コード例 #1
0
def test_pca(sagemaker_session):
    with timeout(minutes=TRAINING_DEFAULT_TIMEOUT_MINUTES):
        data_path = os.path.join(DATA_DIR, 'one_p_mnist', 'mnist.pkl.gz')
        pickle_args = {} if sys.version_info.major == 2 else {'encoding': 'latin1'}

        # Load the data into memory as numpy arrays
        with gzip.open(data_path, 'rb') as f:
            train_set, _, _ = pickle.load(f, **pickle_args)

        pca = sagemaker.amazon.pca.PCA(role='SageMakerRole', train_instance_count=1,
                                       train_instance_type='ml.m4.xlarge',
                                       num_components=48, sagemaker_session=sagemaker_session, base_job_name='test-pca')

        pca.algorithm_mode = 'randomized'
        pca.subtract_mean = True
        pca.extra_components = 5
        pca.fit(pca.record_set(train_set[0][:100]))

    endpoint_name = unique_name_from_base('pca')
    with timeout_and_delete_endpoint_by_name(endpoint_name, sagemaker_session):
        pca_model = sagemaker.amazon.pca.PCAModel(model_data=pca.model_data, role='SageMakerRole',
                                                  sagemaker_session=sagemaker_session)
        predictor = pca_model.deploy(initial_instance_count=1, instance_type="ml.c4.xlarge",
                                     endpoint_name=endpoint_name)

        result = predictor.predict(train_set[0][:5])

        assert len(result) == 5
        for record in result:
            assert record.label["projection"] is not None
コード例 #2
0
def test_pca(sagemaker_session):
    with timeout(minutes=15):
        data_path = os.path.join(DATA_DIR, 'one_p_mnist', 'mnist.pkl.gz')
        pickle_args = {} if sys.version_info.major == 2 else {'encoding': 'latin1'}

        # Load the data into memory as numpy arrays
        with gzip.open(data_path, 'rb') as f:
            train_set, _, _ = pickle.load(f, **pickle_args)

        pca = sagemaker.amazon.pca.PCA(role='SageMakerRole', train_instance_count=1,
                                       train_instance_type='ml.m4.xlarge',
                                       num_components=48, sagemaker_session=sagemaker_session, base_job_name='test-pca')

        pca.algorithm_mode = 'randomized'
        pca.subtract_mean = True
        pca.extra_components = 5
        pca.fit(pca.record_set(train_set[0][:100]))

    endpoint_name = name_from_base('pca')
    with timeout_and_delete_endpoint_by_name(endpoint_name, sagemaker_session):
        pca_model = sagemaker.amazon.pca.PCAModel(model_data=pca.model_data, role='SageMakerRole',
                                                  sagemaker_session=sagemaker_session)
        predictor = pca_model.deploy(initial_instance_count=1, instance_type="ml.c4.xlarge",
                                     endpoint_name=endpoint_name)

        result = predictor.predict(train_set[0][:5])

        assert len(result) == 5
        for record in result:
            assert record.label["projection"] is not None
コード例 #3
0
def test_pca(sagemaker_session, cpu_instance_type, training_set):
    job_name = unique_name_from_base("pca")

    with timeout(minutes=TRAINING_DEFAULT_TIMEOUT_MINUTES):
        pca = sagemaker.amazon.pca.PCA(
            role="SageMakerRole",
            instance_count=1,
            instance_type=cpu_instance_type,
            num_components=48,
            sagemaker_session=sagemaker_session,
            enable_network_isolation=True,
        )

        pca.algorithm_mode = "randomized"
        pca.subtract_mean = True
        pca.extra_components = 5
        pca.fit(pca.record_set(training_set[0][:100]), job_name=job_name)

    with timeout_and_delete_endpoint_by_name(job_name, sagemaker_session):
        pca_model = sagemaker.amazon.pca.PCAModel(
            model_data=pca.model_data,
            role="SageMakerRole",
            sagemaker_session=sagemaker_session,
            enable_network_isolation=True,
        )
        predictor = pca_model.deploy(initial_instance_count=1,
                                     instance_type=cpu_instance_type,
                                     endpoint_name=job_name)

        result = predictor.predict(training_set[0][:5])

        assert len(result) == 5
        for record in result:
            assert record.label["projection"] is not None
コード例 #4
0
def test_async_pca():

    training_job_name = ""
    endpoint_name = name_from_base('pca')
    sagemaker_session = sagemaker.Session(boto_session=boto3.Session(
        region_name=REGION))

    with timeout(minutes=20):

        data_path = os.path.join(DATA_DIR, 'one_p_mnist', 'mnist.pkl.gz')
        pickle_args = {} if sys.version_info.major == 2 else {
            'encoding': 'latin1'
        }

        # Load the data into memory as numpy arrays
        with gzip.open(data_path, 'rb') as f:
            train_set, _, _ = pickle.load(f, **pickle_args)

        pca = sagemaker.amazon.pca.PCA(role='SageMakerRole',
                                       train_instance_count=1,
                                       train_instance_type='ml.m4.xlarge',
                                       num_components=48,
                                       sagemaker_session=sagemaker_session,
                                       base_job_name='test-pca')

        pca.algorithm_mode = 'randomized'
        pca.subtract_mean = True
        pca.extra_components = 5
        pca.fit(pca.record_set(train_set[0][:100]), wait=False)
        training_job_name = pca.latest_training_job.name

        print("Detached from training job. Will re-attach in 20 seconds")
        time.sleep(20)

    with timeout_and_delete_endpoint_by_name(endpoint_name,
                                             sagemaker_session,
                                             minutes=20):
        estimator = sagemaker.amazon.pca.PCA.attach(
            training_job_name=training_job_name,
            sagemaker_session=sagemaker_session)

        model = sagemaker.amazon.pca.PCAModel(
            estimator.model_data,
            role='SageMakerRole',
            sagemaker_session=sagemaker_session)
        predictor = model.deploy(initial_instance_count=1,
                                 instance_type="ml.c4.xlarge",
                                 endpoint_name=endpoint_name)

        result = predictor.predict(train_set[0][:5])

        assert len(result) == 5
        for record in result:
            assert record.label["projection"] is not None
コード例 #5
0
def test_async_pca(sagemaker_session):
    job_name = unique_name_from_base("pca")

    with timeout(minutes=5):
        data_path = os.path.join(DATA_DIR, "one_p_mnist", "mnist.pkl.gz")
        pickle_args = {} if sys.version_info.major == 2 else {
            "encoding": "latin1"
        }

        # Load the data into memory as numpy arrays
        with gzip.open(data_path, "rb") as f:
            train_set, _, _ = pickle.load(f, **pickle_args)

        pca = sagemaker.amazon.pca.PCA(
            role="SageMakerRole",
            train_instance_count=1,
            train_instance_type="ml.m4.xlarge",
            num_components=48,
            sagemaker_session=sagemaker_session,
            base_job_name="test-pca",
        )

        pca.algorithm_mode = "randomized"
        pca.subtract_mean = True
        pca.extra_components = 5
        pca.fit(pca.record_set(train_set[0][:100]),
                wait=False,
                job_name=job_name)

        print("Detached from training job. Will re-attach in 20 seconds")
        time.sleep(20)

    with timeout_and_delete_endpoint_by_name(job_name, sagemaker_session):
        estimator = sagemaker.amazon.pca.PCA.attach(
            training_job_name=job_name, sagemaker_session=sagemaker_session)

        model = sagemaker.amazon.pca.PCAModel(
            estimator.model_data,
            role="SageMakerRole",
            sagemaker_session=sagemaker_session)
        predictor = model.deploy(initial_instance_count=1,
                                 instance_type="ml.c4.xlarge",
                                 endpoint_name=job_name)

        result = predictor.predict(train_set[0][:5])

        assert len(result) == 5
        for record in result:
            assert record.label["projection"] is not None
コード例 #6
0
def test_pca():
    with timeout(minutes=15):
        sagemaker_session = sagemaker.Session(boto_session=boto3.Session(
            region_name=REGION))
        data_path = os.path.join(DATA_DIR, 'one_p_mnist', 'mnist.pkl.gz')
        pickle_args = {} if sys.version_info.major == 2 else {
            'encoding': 'latin1'
        }

        # Load the data into memory as numpy arrays
        with gzip.open(data_path, 'rb') as f:
            train_set, _, _ = pickle.load(f, **pickle_args)

        pca = sagemaker.amazon.pca.PCA(role='SageMakerRole',
                                       train_instance_count=1,
                                       train_instance_type='ml.m4.xlarge',
                                       num_components=48,
                                       sagemaker_session=sagemaker_session,
                                       base_job_name='test-pca')

        pca.algorithm_mode = 'randomized'
        pca.subtract_mean = True
        pca.extra_components = 5
        pca.fit(pca.record_set(train_set[0][:100]))

    with timeout(minutes=15):
        pca_model = sagemaker.amazon.pca.PCAModel(
            model_data=pca.model_data,
            role='SageMakerRole',
            sagemaker_session=sagemaker_session)
        predictor = pca_model.deploy(initial_instance_count=1,
                                     instance_type="ml.c4.xlarge")

        try:
            result = predictor.predict(train_set[0][:5])

            assert len(result) == 5
            for record in result:
                assert record.label["projection"] is not None
        finally:
            sagemaker_session.delete_endpoint(predictor.endpoint)
コード例 #7
0
def test_async_pca(sagemaker_session, cpu_instance_type, training_set):
    job_name = unique_name_from_base("pca")

    with timeout(minutes=5):
        pca = sagemaker.amazon.pca.PCA(
            role="SageMakerRole",
            instance_count=1,
            instance_type=cpu_instance_type,
            num_components=48,
            sagemaker_session=sagemaker_session,
            base_job_name="test-pca",
        )

        pca.algorithm_mode = "randomized"
        pca.subtract_mean = True
        pca.extra_components = 5
        pca.fit(pca.record_set(training_set[0][:100]),
                wait=False,
                job_name=job_name)

        print("Detached from training job. Will re-attach in 20 seconds")
        time.sleep(20)

    with timeout_and_delete_endpoint_by_name(job_name, sagemaker_session):
        estimator = sagemaker.amazon.pca.PCA.attach(
            training_job_name=job_name, sagemaker_session=sagemaker_session)

        model = sagemaker.amazon.pca.PCAModel(
            estimator.model_data,
            role="SageMakerRole",
            sagemaker_session=sagemaker_session)
        predictor = model.deploy(initial_instance_count=1,
                                 instance_type=cpu_instance_type,
                                 endpoint_name=job_name)

        result = predictor.predict(training_set[0][:5])

        assert len(result) == 5
        for record in result:
            assert record.label["projection"] is not None
コード例 #8
0
def test_pca(sagemaker_session, cpu_instance_type):
    job_name = unique_name_from_base("pca")

    with timeout(minutes=TRAINING_DEFAULT_TIMEOUT_MINUTES):
        data_path = os.path.join(DATA_DIR, "one_p_mnist", "mnist.pkl.gz")
        pickle_args = {} if sys.version_info.major == 2 else {
            "encoding": "latin1"
        }

        # Load the data into memory as numpy arrays
        with gzip.open(data_path, "rb") as f:
            train_set, _, _ = pickle.load(f, **pickle_args)

        pca = sagemaker.amazon.pca.PCA(
            role="SageMakerRole",
            train_instance_count=1,
            train_instance_type=cpu_instance_type,
            num_components=48,
            sagemaker_session=sagemaker_session,
        )

        pca.algorithm_mode = "randomized"
        pca.subtract_mean = True
        pca.extra_components = 5
        pca.fit(pca.record_set(train_set[0][:100]), job_name=job_name)

    with timeout_and_delete_endpoint_by_name(job_name, sagemaker_session):
        pca_model = sagemaker.amazon.pca.PCAModel(
            model_data=pca.model_data,
            role="SageMakerRole",
            sagemaker_session=sagemaker_session)
        predictor = pca_model.deploy(initial_instance_count=1,
                                     instance_type=cpu_instance_type,
                                     endpoint_name=job_name)

        result = predictor.predict(train_set[0][:5])

        assert len(result) == 5
        for record in result:
            assert record.label["projection"] is not None
コード例 #9
0
def test_async_walkthrough(sagemaker_session, cpu_instance_type, training_set):
    job_name = unique_name_from_base("pca")

    with timeout(minutes=TRAINING_DEFAULT_TIMEOUT_MINUTES):
        pca = sagemaker.amazon.pca.PCA(
            role="SageMakerRole",
            instance_count=1,
            instance_type=cpu_instance_type,
            num_components=48,
            sagemaker_session=sagemaker_session,
        )

        pca.algorithm_mode = "randomized"
        pca.subtract_mean = True
        pca.extra_components = 5
        pca.fit(pca.record_set(training_set[0][:100]), job_name=job_name)

    with timeout_and_delete_endpoint_by_name(job_name, sagemaker_session):
        predictor_async = pca.deploy(
            endpoint_name=job_name,
            initial_instance_count=1,
            instance_type=cpu_instance_type,
            async_inference_config=AsyncInferenceConfig(),
        )
        assert isinstance(predictor_async, AsyncPredictor)

        data = training_set[0][:5]
        result_no_wait_with_data = predictor_async.predict_async(data=data)
        assert isinstance(result_no_wait_with_data, AsyncInferenceResponse)
        assert result_no_wait_with_data.output_path.startswith(
            "s3://" + sagemaker_session.default_bucket()
        )
        time.sleep(5)
        result_no_wait_with_data = result_no_wait_with_data.get_result()
        assert len(result_no_wait_with_data) == 5
        for record in result_no_wait_with_data:
            assert record.label["projection"] is not None

        result_wait_with_data = predictor_async.predict(data=data)
        assert len(result_wait_with_data) == 5
        for idx, record in enumerate(result_wait_with_data):
            assert record.label["projection"] is not None
            assert record.label["projection"] == result_no_wait_with_data[idx].label["projection"]

        s3_key_prefix = os.path.join(
            "integ-test-test-async-inference",
            job_name,
        )

        input_s3_path = os.path.join(
            "s3://",
            sagemaker_session.default_bucket(),
            s3_key_prefix,
            "async-inference-pca-input.csv",
        )

        sagemaker_session.upload_data(
            path=INPUT_LOCAL_PATH,
            bucket=sagemaker_session.default_bucket(),
            key_prefix=s3_key_prefix,
            extra_args={"ContentType": "text/csv"},
        )

        result_not_wait = predictor_async.predict_async(input_path=input_s3_path)
        assert isinstance(result_not_wait, AsyncInferenceResponse)
        assert result_not_wait.output_path.startswith("s3://" + sagemaker_session.default_bucket())
        time.sleep(5)
        result_not_wait = result_not_wait.get_result()
        assert len(result_not_wait) == 5
        for record in result_not_wait:
            assert record.label["projection"] is not None

        result_wait = predictor_async.predict(input_path=input_s3_path)
        assert len(result_wait) == 5
        for idx, record in enumerate(result_wait):
            assert record.label["projection"] is not None
            assert record.label["projection"] == result_not_wait[idx].label["projection"]