import pytest

import sagemaker.utils
import tests.integ as integ

from sagemaker.pytorch import PyTorch
from tests.integ import timeout
from tests.integ.test_pytorch import _upload_training_data

smdataparallel_dir = os.path.join(os.path.dirname(__file__), "..", "data",
                                  "smdistributed_dataparallel")


@pytest.mark.skipif(
    integ.test_region() not in integ.DATA_PARALLEL_TESTING_REGIONS,
    reason=
    "Only allow this test to run in IAD and CMH to limit usage of p3.16xlarge",
)
def test_smdataparallel_pt_mnist(
    sagemaker_session,
    pytorch_training_latest_version,
    pytorch_training_latest_py_version,
):
    job_name = sagemaker.utils.unique_name_from_base(
        "pt-sm-distributed-dataparallel")
    estimator = PyTorch(
        entry_point="mnist_pt.py",
        role="SageMakerRole",
        source_dir=smdataparallel_dir,
        instance_count=2,
Ejemplo n.º 2
0
# language governing permissions and limitations under the License.
from __future__ import absolute_import

import os

import pytest

from sagemaker.huggingface import HuggingFace
from tests import integ
from tests.integ import DATA_DIR, TRAINING_DEFAULT_TIMEOUT_MINUTES
from tests.integ.timeout import timeout


@pytest.mark.release
@pytest.mark.skipif(
    integ.test_region() in integ.TRAINING_NO_P2_REGIONS,
    reason="no ml.p2 instances in this region",
)
def test_huggingface_training(
    sagemaker_session,
    gpu_instance_type,
    huggingface_training_latest_version,
    huggingface_pytorch_latest_version,
):
    with timeout(minutes=TRAINING_DEFAULT_TIMEOUT_MINUTES):
        data_path = os.path.join(DATA_DIR, "huggingface")

        hf = HuggingFace(
            py_version="py36",
            entry_point="examples/text-classification/run_glue.py",
            role="SageMakerRole",
Ejemplo n.º 3
0
            entry_point="mnist.py",
            framework_version=pytorch_inference_latest_version,
            py_version=pytorch_inference_latest_py_version,
            sagemaker_session=sagemaker_session,
        )
        predictor = model.deploy(1, cpu_instance_type, endpoint_name=endpoint_name)

        batch_size = 100
        data = numpy.random.rand(batch_size, 1, 28, 28).astype(numpy.float32)
        output = predictor.predict(data)

        assert output.shape == (batch_size, 10)


@pytest.mark.skipif(
    test_region() not in EI_SUPPORTED_REGIONS, reason="EI isn't supported in that specific region."
)
def test_deploy_model_with_accelerator(
    sagemaker_session,
    cpu_instance_type,
    pytorch_eia_latest_version,
    pytorch_eia_latest_py_version,
):
    endpoint_name = "test-pytorch-deploy-eia-{}".format(sagemaker_timestamp())
    model_data = sagemaker_session.upload_data(path=EIA_MODEL)
    pytorch = PyTorchModel(
        model_data,
        "SageMakerRole",
        entry_point=EIA_SCRIPT,
        framework_version=pytorch_eia_latest_version,
        py_version=pytorch_eia_latest_py_version,
            sagemaker_session=sagemaker_session,
        )
        predictor = model.deploy(1,
                                 cpu_instance_type,
                                 endpoint_name=endpoint_name)

        batch_size = 100
        data = numpy.random.rand(batch_size, 1, 28, 28).astype(numpy.float32)
        output = predictor.predict(data)

        assert output.shape == (batch_size, 10)


@pytest.mark.skipif(PYTHON_VERSION == "py2",
                    reason="PyTorch EIA does not support Python 2.")
@pytest.mark.skipif(test_region() not in EI_SUPPORTED_REGIONS,
                    reason="EI isn't supported in that specific region.")
def test_deploy_model_with_accelerator(sagemaker_session, cpu_instance_type):
    endpoint_name = "test-pytorch-deploy-eia-{}".format(sagemaker_timestamp())
    model_data = sagemaker_session.upload_data(path=EIA_MODEL)
    pytorch = PyTorchModel(
        model_data,
        "SageMakerRole",
        framework_version="1.3.1",
        entry_point=EIA_SCRIPT,
        sagemaker_session=sagemaker_session,
    )
    with timeout_and_delete_endpoint_by_name(endpoint_name, sagemaker_session):
        predictor = pytorch.deploy(
            initial_instance_count=1,
            instance_type=cpu_instance_type,
Ejemplo n.º 5
0
    endpoint_name = "test-mxnet-coach-deploy-{}".format(sagemaker_timestamp())

    with timeout_and_delete_endpoint_by_name(endpoint_name, sagemaker_session):
        predictor = estimator.deploy(
            1, cpu_instance_type, entry_point="mxnet_deploy.py", endpoint_name=endpoint_name
        )

        observation = numpy.asarray([0, 0, 0, 0])
        action = predictor.predict(observation)

    assert 0 < action[0][0] < 1
    assert 0 < action[0][1] < 1


@pytest.mark.skipif(
    test_region() not in RL_SUPPORTED_REGIONS,
    reason="Updated RL images aren't in {}".format(test_region()),
)
def test_coach_tf(sagemaker_session, coach_tensorflow_latest_version, cpu_instance_type):
    estimator = _test_coach(
        sagemaker_session,
        RLFramework.TENSORFLOW,
        coach_tensorflow_latest_version,
        cpu_instance_type,
    )
    job_name = unique_name_from_base("test-coach-tf")

    with timeout(minutes=15):
        estimator.fit(job_name=job_name)

    endpoint_name = "test-tf-coach-deploy-{}".format(sagemaker_timestamp())
horovod_dir = os.path.join(os.path.dirname(__file__), "..", "data", "horovod")


@pytest.fixture(scope="module")
def gpu_instance_type(request):
    return "ml.p2.xlarge"


@pytest.mark.canary_quick
def test_hvd_cpu(sagemaker_session, cpu_instance_type, tmpdir):
    __create_and_fit_estimator(sagemaker_session, cpu_instance_type, tmpdir)


@pytest.mark.canary_quick
@pytest.mark.skipif(integ.test_region() in integ.HOSTING_NO_P2_REGIONS,
                    reason="no ml.p2 instances in this region")
def test_hvd_gpu(sagemaker_session, gpu_instance_type, tmpdir):
    __create_and_fit_estimator(sagemaker_session, gpu_instance_type, tmpdir)


@pytest.mark.local_mode
@pytest.mark.parametrize("instances, processes", [[1, 2], (2, 1), (2, 2)])
def test_horovod_local_mode(sagemaker_local_session, instances, processes,
                            tmpdir):
    output_path = "file://%s" % tmpdir
    job_name = sagemaker.utils.unique_name_from_base("tf-horovod")
    estimator = TensorFlow(
        entry_point=os.path.join(horovod_dir, "hvd_basic.py"),
        role="SageMakerRole",
        train_instance_count=2,
Ejemplo n.º 7
0
            sagemaker_session=sagemaker_session,
        )

        train_input = mx.sagemaker_session.upload_data(
            path=os.path.join(data_path, "train"), key_prefix="integ-test-data/mxnet_mnist/train"
        )
        test_input = mx.sagemaker_session.upload_data(
            path=os.path.join(data_path, "test"), key_prefix="integ-test-data/mxnet_mnist/test"
        )

        mx.fit({"train": train_input, "test": test_input})
        return mx.latest_training_job.name


@pytest.mark.skipif(
    test_region() not in EDGE_PACKAGING_SUPPORTED_REGIONS,
    reason="Edge packaging isn't supported in that specific region.",
)
def test_edge_packaging_job(mxnet_training_job, sagemaker_session):
    estimator = MXNet.attach(mxnet_training_job, sagemaker_session=sagemaker_session)
    model = estimator.compile_model(
        target_instance_family="rasp3b",
        input_shape={"data": [1, 1, 28, 28], "softmax_label": [1]},
        output_path=estimator.output_path,
    )

    model.package_for_edge(
        output_path=estimator.output_path,
        role=estimator.role,
        model_name="sdk-test-model",
        model_version="1.0",
Ejemplo n.º 8
0
import sagemaker.utils
import tests.integ as integ
from sagemaker.tensorflow import TensorFlow
from tests.integ import test_region, timeout, HOSTING_NO_P3_REGIONS

horovod_dir = os.path.join(os.path.dirname(__file__), "..", "data", "horovod")


@pytest.fixture(
    scope="session",
    params=[
        "ml.c4.xlarge",
        pytest.param(
            "ml.p3.2xlarge",
            marks=pytest.mark.skipif(
                test_region() in HOSTING_NO_P3_REGIONS,
                reason="no ml.p3 instances in this region"),
        ),
    ],
)
def instance_type(request):
    return request.param


@pytest.mark.canary_quick
def test_horovod(sagemaker_session, instance_type, tmpdir):
    job_name = sagemaker.utils.unique_name_from_base("tf-horovod")
    estimator = TensorFlow(
        entry_point=os.path.join(horovod_dir, "test_hvd_basic.py"),
        role="SageMakerRole",
        train_instance_count=2,
Ejemplo n.º 9
0
import pytest

from sagemaker.huggingface import HuggingFace, TrainingCompilerConfig
from tests import integ
from tests.integ import DATA_DIR, TRAINING_DEFAULT_TIMEOUT_MINUTES
from tests.integ.timeout import timeout


@pytest.fixture(scope="module")
def gpu_instance_type(request):
    return "ml.p3.2xlarge"


@pytest.mark.release
@pytest.mark.skipif(
    integ.test_region() not in integ.TRAINING_COMPILER_SUPPORTED_REGIONS,
    reason="SageMaker Training Compiler is not supported in this region",
)
@pytest.mark.skipif(
    integ.test_region() in integ.TRAINING_NO_P3_REGIONS,
    reason="no ml.p3 instances in this region",
)
def test_huggingface_pytorch(
    sagemaker_session,
    gpu_instance_type,
    huggingface_training_compiler_latest_version,
    huggingface_training_compiler_pytorch_latest_version,
):
    with timeout(minutes=TRAINING_DEFAULT_TIMEOUT_MINUTES):
        data_path = os.path.join(DATA_DIR, "huggingface")
Ejemplo n.º 10
0
    sagemaker_session,
    cpu_instance_type,
    tmpdir,
):
    _create_and_fit_estimator(
        mxnet_training_latest_version,
        mxnet_training_latest_py_version,
        sagemaker_session,
        cpu_instance_type,
        tmpdir,
    )


@pytest.mark.release
@pytest.mark.skipif(
    integ.test_region() in integ.TRAINING_NO_P2_REGIONS
    and integ.test_region() in integ.TRAINING_NO_P3_REGIONS,
    reason="no ml.p2 or ml.p3 instances in this region",
)
def test_hvd_gpu(
    mxnet_training_latest_version,
    mxnet_training_latest_py_version,
    sagemaker_session,
    gpu_instance_type,
    tmpdir,
):
    _create_and_fit_estimator(
        mxnet_training_latest_version,
        mxnet_training_latest_py_version,
        sagemaker_session,
        gpu_instance_type,
# language governing permissions and limitations under the License.
from __future__ import absolute_import

import os

import pytest

from sagemaker.huggingface import HuggingFace
from tests import integ
from tests.integ import DATA_DIR, TRAINING_DEFAULT_TIMEOUT_MINUTES
from tests.integ.timeout import timeout


@pytest.mark.release
@pytest.mark.skipif(
    integ.test_region() in integ.TRAINING_NO_P2_REGIONS, reason="no ml.p2 instances in this region"
)
def test_huggingface_training(
    sagemaker_session,
    gpu_instance_type,
    huggingface_training_latest_version,
    huggingface_pytorch_version,
):
    with timeout(minutes=TRAINING_DEFAULT_TIMEOUT_MINUTES):
        data_path = os.path.join(DATA_DIR, "huggingface")

        hf = HuggingFace(
            py_version="py36",
            entry_point="examples/text-classification/run_glue.py",
            role="SageMakerRole",
            transformers_version=huggingface_training_latest_version,
Ejemplo n.º 12
0
import pytest

from sagemaker.huggingface import HuggingFace, HuggingFaceProcessor
from sagemaker.huggingface.model import HuggingFaceModel, HuggingFacePredictor
from sagemaker.utils import unique_name_from_base
from tests import integ
from tests.integ.utils import gpu_list, retry_with_instance_list
from tests.integ import DATA_DIR, TRAINING_DEFAULT_TIMEOUT_MINUTES
from tests.integ.timeout import timeout, timeout_and_delete_endpoint_by_name

ROLE = "SageMakerRole"


@pytest.mark.release
@pytest.mark.skipif(
    integ.test_region() in integ.TRAINING_NO_P2_REGIONS
    and integ.test_region() in integ.TRAINING_NO_P3_REGIONS,
    reason="no ml.p2 or ml.p3 instances in this region",
)
@retry_with_instance_list(gpu_list(integ.test_region()))
def test_framework_processing_job_with_deps(
    sagemaker_session,
    huggingface_training_latest_version,
    huggingface_training_pytorch_latest_version,
    huggingface_pytorch_latest_training_py_version,
    **kwargs,
):
    with timeout(minutes=TRAINING_DEFAULT_TIMEOUT_MINUTES):
        code_path = os.path.join(DATA_DIR, "dummy_code_bundle_with_reqs")
        entry_point = "main_script.py"