def test_pca(sagemaker_session): with timeout(minutes=TRAINING_DEFAULT_TIMEOUT_MINUTES): data_path = os.path.join(DATA_DIR, 'one_p_mnist', 'mnist.pkl.gz') pickle_args = {} if sys.version_info.major == 2 else {'encoding': 'latin1'} # Load the data into memory as numpy arrays with gzip.open(data_path, 'rb') as f: train_set, _, _ = pickle.load(f, **pickle_args) pca = sagemaker.amazon.pca.PCA(role='SageMakerRole', train_instance_count=1, train_instance_type='ml.m4.xlarge', num_components=48, sagemaker_session=sagemaker_session, base_job_name='test-pca') pca.algorithm_mode = 'randomized' pca.subtract_mean = True pca.extra_components = 5 pca.fit(pca.record_set(train_set[0][:100])) endpoint_name = unique_name_from_base('pca') with timeout_and_delete_endpoint_by_name(endpoint_name, sagemaker_session): pca_model = sagemaker.amazon.pca.PCAModel(model_data=pca.model_data, role='SageMakerRole', sagemaker_session=sagemaker_session) predictor = pca_model.deploy(initial_instance_count=1, instance_type="ml.c4.xlarge", endpoint_name=endpoint_name) result = predictor.predict(train_set[0][:5]) assert len(result) == 5 for record in result: assert record.label["projection"] is not None
def test_pca(sagemaker_session): with timeout(minutes=15): data_path = os.path.join(DATA_DIR, 'one_p_mnist', 'mnist.pkl.gz') pickle_args = {} if sys.version_info.major == 2 else {'encoding': 'latin1'} # Load the data into memory as numpy arrays with gzip.open(data_path, 'rb') as f: train_set, _, _ = pickle.load(f, **pickle_args) pca = sagemaker.amazon.pca.PCA(role='SageMakerRole', train_instance_count=1, train_instance_type='ml.m4.xlarge', num_components=48, sagemaker_session=sagemaker_session, base_job_name='test-pca') pca.algorithm_mode = 'randomized' pca.subtract_mean = True pca.extra_components = 5 pca.fit(pca.record_set(train_set[0][:100])) endpoint_name = name_from_base('pca') with timeout_and_delete_endpoint_by_name(endpoint_name, sagemaker_session): pca_model = sagemaker.amazon.pca.PCAModel(model_data=pca.model_data, role='SageMakerRole', sagemaker_session=sagemaker_session) predictor = pca_model.deploy(initial_instance_count=1, instance_type="ml.c4.xlarge", endpoint_name=endpoint_name) result = predictor.predict(train_set[0][:5]) assert len(result) == 5 for record in result: assert record.label["projection"] is not None
def test_pca(sagemaker_session, cpu_instance_type, training_set): job_name = unique_name_from_base("pca") with timeout(minutes=TRAINING_DEFAULT_TIMEOUT_MINUTES): pca = sagemaker.amazon.pca.PCA( role="SageMakerRole", instance_count=1, instance_type=cpu_instance_type, num_components=48, sagemaker_session=sagemaker_session, enable_network_isolation=True, ) pca.algorithm_mode = "randomized" pca.subtract_mean = True pca.extra_components = 5 pca.fit(pca.record_set(training_set[0][:100]), job_name=job_name) with timeout_and_delete_endpoint_by_name(job_name, sagemaker_session): pca_model = sagemaker.amazon.pca.PCAModel( model_data=pca.model_data, role="SageMakerRole", sagemaker_session=sagemaker_session, enable_network_isolation=True, ) predictor = pca_model.deploy(initial_instance_count=1, instance_type=cpu_instance_type, endpoint_name=job_name) result = predictor.predict(training_set[0][:5]) assert len(result) == 5 for record in result: assert record.label["projection"] is not None
def test_async_pca(): training_job_name = "" endpoint_name = name_from_base('pca') sagemaker_session = sagemaker.Session(boto_session=boto3.Session( region_name=REGION)) with timeout(minutes=20): data_path = os.path.join(DATA_DIR, 'one_p_mnist', 'mnist.pkl.gz') pickle_args = {} if sys.version_info.major == 2 else { 'encoding': 'latin1' } # Load the data into memory as numpy arrays with gzip.open(data_path, 'rb') as f: train_set, _, _ = pickle.load(f, **pickle_args) pca = sagemaker.amazon.pca.PCA(role='SageMakerRole', train_instance_count=1, train_instance_type='ml.m4.xlarge', num_components=48, sagemaker_session=sagemaker_session, base_job_name='test-pca') pca.algorithm_mode = 'randomized' pca.subtract_mean = True pca.extra_components = 5 pca.fit(pca.record_set(train_set[0][:100]), wait=False) training_job_name = pca.latest_training_job.name print("Detached from training job. Will re-attach in 20 seconds") time.sleep(20) with timeout_and_delete_endpoint_by_name(endpoint_name, sagemaker_session, minutes=20): estimator = sagemaker.amazon.pca.PCA.attach( training_job_name=training_job_name, sagemaker_session=sagemaker_session) model = sagemaker.amazon.pca.PCAModel( estimator.model_data, role='SageMakerRole', sagemaker_session=sagemaker_session) predictor = model.deploy(initial_instance_count=1, instance_type="ml.c4.xlarge", endpoint_name=endpoint_name) result = predictor.predict(train_set[0][:5]) assert len(result) == 5 for record in result: assert record.label["projection"] is not None
def test_async_pca(sagemaker_session): job_name = unique_name_from_base("pca") with timeout(minutes=5): data_path = os.path.join(DATA_DIR, "one_p_mnist", "mnist.pkl.gz") pickle_args = {} if sys.version_info.major == 2 else { "encoding": "latin1" } # Load the data into memory as numpy arrays with gzip.open(data_path, "rb") as f: train_set, _, _ = pickle.load(f, **pickle_args) pca = sagemaker.amazon.pca.PCA( role="SageMakerRole", train_instance_count=1, train_instance_type="ml.m4.xlarge", num_components=48, sagemaker_session=sagemaker_session, base_job_name="test-pca", ) pca.algorithm_mode = "randomized" pca.subtract_mean = True pca.extra_components = 5 pca.fit(pca.record_set(train_set[0][:100]), wait=False, job_name=job_name) print("Detached from training job. Will re-attach in 20 seconds") time.sleep(20) with timeout_and_delete_endpoint_by_name(job_name, sagemaker_session): estimator = sagemaker.amazon.pca.PCA.attach( training_job_name=job_name, sagemaker_session=sagemaker_session) model = sagemaker.amazon.pca.PCAModel( estimator.model_data, role="SageMakerRole", sagemaker_session=sagemaker_session) predictor = model.deploy(initial_instance_count=1, instance_type="ml.c4.xlarge", endpoint_name=job_name) result = predictor.predict(train_set[0][:5]) assert len(result) == 5 for record in result: assert record.label["projection"] is not None
def test_pca(): with timeout(minutes=15): sagemaker_session = sagemaker.Session(boto_session=boto3.Session( region_name=REGION)) data_path = os.path.join(DATA_DIR, 'one_p_mnist', 'mnist.pkl.gz') pickle_args = {} if sys.version_info.major == 2 else { 'encoding': 'latin1' } # Load the data into memory as numpy arrays with gzip.open(data_path, 'rb') as f: train_set, _, _ = pickle.load(f, **pickle_args) pca = sagemaker.amazon.pca.PCA(role='SageMakerRole', train_instance_count=1, train_instance_type='ml.m4.xlarge', num_components=48, sagemaker_session=sagemaker_session, base_job_name='test-pca') pca.algorithm_mode = 'randomized' pca.subtract_mean = True pca.extra_components = 5 pca.fit(pca.record_set(train_set[0][:100])) with timeout(minutes=15): pca_model = sagemaker.amazon.pca.PCAModel( model_data=pca.model_data, role='SageMakerRole', sagemaker_session=sagemaker_session) predictor = pca_model.deploy(initial_instance_count=1, instance_type="ml.c4.xlarge") try: result = predictor.predict(train_set[0][:5]) assert len(result) == 5 for record in result: assert record.label["projection"] is not None finally: sagemaker_session.delete_endpoint(predictor.endpoint)
def test_async_pca(sagemaker_session, cpu_instance_type, training_set): job_name = unique_name_from_base("pca") with timeout(minutes=5): pca = sagemaker.amazon.pca.PCA( role="SageMakerRole", instance_count=1, instance_type=cpu_instance_type, num_components=48, sagemaker_session=sagemaker_session, base_job_name="test-pca", ) pca.algorithm_mode = "randomized" pca.subtract_mean = True pca.extra_components = 5 pca.fit(pca.record_set(training_set[0][:100]), wait=False, job_name=job_name) print("Detached from training job. Will re-attach in 20 seconds") time.sleep(20) with timeout_and_delete_endpoint_by_name(job_name, sagemaker_session): estimator = sagemaker.amazon.pca.PCA.attach( training_job_name=job_name, sagemaker_session=sagemaker_session) model = sagemaker.amazon.pca.PCAModel( estimator.model_data, role="SageMakerRole", sagemaker_session=sagemaker_session) predictor = model.deploy(initial_instance_count=1, instance_type=cpu_instance_type, endpoint_name=job_name) result = predictor.predict(training_set[0][:5]) assert len(result) == 5 for record in result: assert record.label["projection"] is not None
def test_pca(sagemaker_session, cpu_instance_type): job_name = unique_name_from_base("pca") with timeout(minutes=TRAINING_DEFAULT_TIMEOUT_MINUTES): data_path = os.path.join(DATA_DIR, "one_p_mnist", "mnist.pkl.gz") pickle_args = {} if sys.version_info.major == 2 else { "encoding": "latin1" } # Load the data into memory as numpy arrays with gzip.open(data_path, "rb") as f: train_set, _, _ = pickle.load(f, **pickle_args) pca = sagemaker.amazon.pca.PCA( role="SageMakerRole", train_instance_count=1, train_instance_type=cpu_instance_type, num_components=48, sagemaker_session=sagemaker_session, ) pca.algorithm_mode = "randomized" pca.subtract_mean = True pca.extra_components = 5 pca.fit(pca.record_set(train_set[0][:100]), job_name=job_name) with timeout_and_delete_endpoint_by_name(job_name, sagemaker_session): pca_model = sagemaker.amazon.pca.PCAModel( model_data=pca.model_data, role="SageMakerRole", sagemaker_session=sagemaker_session) predictor = pca_model.deploy(initial_instance_count=1, instance_type=cpu_instance_type, endpoint_name=job_name) result = predictor.predict(train_set[0][:5]) assert len(result) == 5 for record in result: assert record.label["projection"] is not None
def test_async_walkthrough(sagemaker_session, cpu_instance_type, training_set): job_name = unique_name_from_base("pca") with timeout(minutes=TRAINING_DEFAULT_TIMEOUT_MINUTES): pca = sagemaker.amazon.pca.PCA( role="SageMakerRole", instance_count=1, instance_type=cpu_instance_type, num_components=48, sagemaker_session=sagemaker_session, ) pca.algorithm_mode = "randomized" pca.subtract_mean = True pca.extra_components = 5 pca.fit(pca.record_set(training_set[0][:100]), job_name=job_name) with timeout_and_delete_endpoint_by_name(job_name, sagemaker_session): predictor_async = pca.deploy( endpoint_name=job_name, initial_instance_count=1, instance_type=cpu_instance_type, async_inference_config=AsyncInferenceConfig(), ) assert isinstance(predictor_async, AsyncPredictor) data = training_set[0][:5] result_no_wait_with_data = predictor_async.predict_async(data=data) assert isinstance(result_no_wait_with_data, AsyncInferenceResponse) assert result_no_wait_with_data.output_path.startswith( "s3://" + sagemaker_session.default_bucket() ) time.sleep(5) result_no_wait_with_data = result_no_wait_with_data.get_result() assert len(result_no_wait_with_data) == 5 for record in result_no_wait_with_data: assert record.label["projection"] is not None result_wait_with_data = predictor_async.predict(data=data) assert len(result_wait_with_data) == 5 for idx, record in enumerate(result_wait_with_data): assert record.label["projection"] is not None assert record.label["projection"] == result_no_wait_with_data[idx].label["projection"] s3_key_prefix = os.path.join( "integ-test-test-async-inference", job_name, ) input_s3_path = os.path.join( "s3://", sagemaker_session.default_bucket(), s3_key_prefix, "async-inference-pca-input.csv", ) sagemaker_session.upload_data( path=INPUT_LOCAL_PATH, bucket=sagemaker_session.default_bucket(), key_prefix=s3_key_prefix, extra_args={"ContentType": "text/csv"}, ) result_not_wait = predictor_async.predict_async(input_path=input_s3_path) assert isinstance(result_not_wait, AsyncInferenceResponse) assert result_not_wait.output_path.startswith("s3://" + sagemaker_session.default_bucket()) time.sleep(5) result_not_wait = result_not_wait.get_result() assert len(result_not_wait) == 5 for record in result_not_wait: assert record.label["projection"] is not None result_wait = predictor_async.predict(input_path=input_s3_path) assert len(result_wait) == 5 for idx, record in enumerate(result_wait): assert record.label["projection"] is not None assert record.label["projection"] == result_not_wait[idx].label["projection"]