def test_knn_regressor(sagemaker_session, cpu_instance_type, training_set): job_name = unique_name_from_base("knn") with timeout(minutes=TRAINING_DEFAULT_TIMEOUT_MINUTES): knn = KNN( role="SageMakerRole", instance_count=1, instance_type=cpu_instance_type, k=10, predictor_type="regressor", sample_size=500, sagemaker_session=sagemaker_session, ) # training labels must be 'float32' knn.fit( knn.record_set(training_set[0][:200], training_set[1][:200].astype("float32")), job_name=job_name, ) with timeout_and_delete_endpoint_by_name(job_name, sagemaker_session): model = KNNModel(knn.model_data, role="SageMakerRole", sagemaker_session=sagemaker_session) predictor = model.deploy(1, cpu_instance_type, endpoint_name=job_name) result = predictor.predict(training_set[0][:10]) assert len(result) == 10 for record in result: assert record.label["score"] is not None
def test_knn_airflow_config_uploads_data_source_to_s3(sagemaker_session, cpu_instance_type): with timeout(seconds=AIRFLOW_CONFIG_TIMEOUT_IN_SECONDS): data_path = os.path.join(DATA_DIR, "one_p_mnist", "mnist.pkl.gz") pickle_args = {} if sys.version_info.major == 2 else {"encoding": "latin1"} # Load the data into memory as numpy arrays with gzip.open(data_path, "rb") as f: train_set, _, _ = pickle.load(f, **pickle_args) knn = KNN( role=ROLE, train_instance_count=SINGLE_INSTANCE_COUNT, train_instance_type=cpu_instance_type, k=10, predictor_type="regressor", sample_size=500, sagemaker_session=sagemaker_session, ) records = knn.record_set(train_set[0][:200], train_set[1][:200].astype("float32")) training_config = _build_airflow_workflow( estimator=knn, instance_type=cpu_instance_type, inputs=records ) _assert_that_s3_url_contains_data( sagemaker_session, training_config["InputDataConfig"][0]["DataSource"]["S3DataSource"]["S3Uri"], )
def test_knn_airflow_config_uploads_data_source_to_s3(sagemaker_session, cpu_instance_type): with timeout(seconds=AIRFLOW_CONFIG_TIMEOUT_IN_SECONDS): knn = KNN( role=ROLE, instance_count=SINGLE_INSTANCE_COUNT, instance_type=cpu_instance_type, k=10, predictor_type="regressor", sample_size=500, sagemaker_session=sagemaker_session, ) training_set = datasets.one_p_mnist() records = knn.record_set(training_set[0][:200], training_set[1][:200].astype("float32")) training_config = _build_airflow_workflow( estimator=knn, instance_type=cpu_instance_type, inputs=records) _assert_that_s3_url_contains_data( sagemaker_session, training_config["InputDataConfig"][0]["DataSource"]["S3DataSource"] ["S3Uri"], )
def test_knn_regressor(sagemaker_session, cpu_instance_type): job_name = unique_name_from_base("knn") with timeout(minutes=TRAINING_DEFAULT_TIMEOUT_MINUTES): data_path = os.path.join(DATA_DIR, "one_p_mnist", "mnist.pkl.gz") pickle_args = {} if sys.version_info.major == 2 else {"encoding": "latin1"} # Load the data into memory as numpy arrays with gzip.open(data_path, "rb") as f: train_set, _, _ = pickle.load(f, **pickle_args) knn = KNN( role="SageMakerRole", train_instance_count=1, train_instance_type=cpu_instance_type, k=10, predictor_type="regressor", sample_size=500, sagemaker_session=sagemaker_session, ) # training labels must be 'float32' knn.fit( knn.record_set(train_set[0][:200], train_set[1][:200].astype("float32")), job_name=job_name, ) with timeout_and_delete_endpoint_by_name(job_name, sagemaker_session): model = KNNModel(knn.model_data, role="SageMakerRole", sagemaker_session=sagemaker_session) predictor = model.deploy(1, cpu_instance_type, endpoint_name=job_name) result = predictor.predict(train_set[0][:10]) assert len(result) == 10 for record in result: assert record.label["score"] is not None
def test_knn_regressor(sagemaker_session): with timeout(minutes=TRAINING_DEFAULT_TIMEOUT_MINUTES): data_path = os.path.join(DATA_DIR, 'one_p_mnist', 'mnist.pkl.gz') pickle_args = {} if sys.version_info.major == 2 else {'encoding': 'latin1'} # Load the data into memory as numpy arrays with gzip.open(data_path, 'rb') as f: train_set, _, _ = pickle.load(f, **pickle_args) knn = KNN(role='SageMakerRole', train_instance_count=1, train_instance_type='ml.c4.xlarge', k=10, predictor_type='regressor', sample_size=500, sagemaker_session=sagemaker_session, base_job_name='test-knn-rr') # training labels must be 'float32' knn.fit(knn.record_set(train_set[0][:200], train_set[1][:200].astype('float32'))) endpoint_name = name_from_base('knn') with timeout_and_delete_endpoint_by_name(endpoint_name, sagemaker_session): model = KNNModel(knn.model_data, role='SageMakerRole', sagemaker_session=sagemaker_session) predictor = model.deploy(1, 'ml.c4.xlarge', endpoint_name=endpoint_name) result = predictor.predict(train_set[0][:10]) assert len(result) == 10 for record in result: assert record.label["score"] is not None
def test_async_knn_classifier(sagemaker_session): job_name = unique_name_from_base("knn") with timeout(minutes=5): data_path = os.path.join(DATA_DIR, "one_p_mnist", "mnist.pkl.gz") pickle_args = {} if sys.version_info.major == 2 else { "encoding": "latin1" } # Load the data into memory as numpy arrays with gzip.open(data_path, "rb") as f: train_set, _, _ = pickle.load(f, **pickle_args) knn = KNN( role="SageMakerRole", train_instance_count=1, train_instance_type="ml.c4.xlarge", k=10, predictor_type="classifier", sample_size=500, index_type="faiss.IVFFlat", index_metric="L2", sagemaker_session=sagemaker_session, ) # training labels must be 'float32' knn.fit( knn.record_set(train_set[0][:200], train_set[1][:200].astype("float32")), wait=False, job_name=job_name, ) print("Detached from training job. Will re-attach in 20 seconds") time.sleep(20) print("attaching now...") with timeout_and_delete_endpoint_by_name(job_name, sagemaker_session): estimator = KNN.attach(training_job_name=job_name, sagemaker_session=sagemaker_session) model = KNNModel(estimator.model_data, role="SageMakerRole", sagemaker_session=sagemaker_session) predictor = model.deploy(1, "ml.c4.xlarge", endpoint_name=job_name) result = predictor.predict(train_set[0][:10]) assert len(result) == 10 for record in result: assert record.label["score"] is not None
def test_async_knn_classifier(sagemaker_session): training_job_name = "" endpoint_name = unique_name_from_base('knn') with timeout(minutes=5): data_path = os.path.join(DATA_DIR, 'one_p_mnist', 'mnist.pkl.gz') pickle_args = {} if sys.version_info.major == 2 else { 'encoding': 'latin1' } # Load the data into memory as numpy arrays with gzip.open(data_path, 'rb') as f: train_set, _, _ = pickle.load(f, **pickle_args) knn = KNN(role='SageMakerRole', train_instance_count=1, train_instance_type='ml.c4.xlarge', k=10, predictor_type='classifier', sample_size=500, index_type='faiss.IVFFlat', index_metric='L2', sagemaker_session=sagemaker_session, base_job_name='test-knn-cl') # training labels must be 'float32' knn.fit(knn.record_set(train_set[0][:200], train_set[1][:200].astype('float32')), wait=False) training_job_name = knn.latest_training_job.name print("Detached from training job. Will re-attach in 20 seconds") time.sleep(20) print("attaching now...") with timeout_and_delete_endpoint_by_name(endpoint_name, sagemaker_session): estimator = KNN.attach(training_job_name=training_job_name, sagemaker_session=sagemaker_session) model = KNNModel(estimator.model_data, role='SageMakerRole', sagemaker_session=sagemaker_session) predictor = model.deploy(1, 'ml.c4.xlarge', endpoint_name=endpoint_name) result = predictor.predict(train_set[0][:10]) assert len(result) == 10 for record in result: assert record.label["score"] is not None
def test_async_knn_classifier(sagemaker_session, cpu_instance_type, training_set): job_name = unique_name_from_base("knn") with timeout(minutes=5): knn = KNN( role="SageMakerRole", instance_count=1, instance_type=cpu_instance_type, k=10, predictor_type="classifier", sample_size=500, index_type="faiss.IVFFlat", index_metric="L2", sagemaker_session=sagemaker_session, ) # training labels must be 'float32' knn.fit( knn.record_set(training_set[0][:200], training_set[1][:200].astype("float32")), wait=False, job_name=job_name, ) print("Detached from training job. Will re-attach in 20 seconds") time.sleep(20) print("attaching now...") with timeout_and_delete_endpoint_by_name(job_name, sagemaker_session): estimator = KNN.attach(training_job_name=job_name, sagemaker_session=sagemaker_session) model = KNNModel(estimator.model_data, role="SageMakerRole", sagemaker_session=sagemaker_session) predictor = model.deploy(1, cpu_instance_type, endpoint_name=job_name) result = predictor.predict(training_set[0][:10]) assert len(result) == 10 for record in result: assert record.label["score"] is not None
def test_knn_regressor(sagemaker_session): with timeout(minutes=TRAINING_DEFAULT_TIMEOUT_MINUTES): data_path = os.path.join(DATA_DIR, 'one_p_mnist', 'mnist.pkl.gz') pickle_args = {} if sys.version_info.major == 2 else { 'encoding': 'latin1' } # Load the data into memory as numpy arrays with gzip.open(data_path, 'rb') as f: train_set, _, _ = pickle.load(f, **pickle_args) knn = KNN(role='SageMakerRole', train_instance_count=1, train_instance_type='ml.c4.xlarge', k=10, predictor_type='regressor', sample_size=500, sagemaker_session=sagemaker_session, base_job_name='test-knn-rr') # training labels must be 'float32' knn.fit( knn.record_set(train_set[0][:200], train_set[1][:200].astype('float32'))) endpoint_name = unique_name_from_base('knn') with timeout_and_delete_endpoint_by_name(endpoint_name, sagemaker_session): model = KNNModel(knn.model_data, role='SageMakerRole', sagemaker_session=sagemaker_session) predictor = model.deploy(1, 'ml.c4.xlarge', endpoint_name=endpoint_name) result = predictor.predict(train_set[0][:10]) assert len(result) == 10 for record in result: assert record.label["score"] is not None
def test_async_knn_classifier(sagemaker_session): training_job_name = "" endpoint_name = name_from_base('knn') with timeout(minutes=5): data_path = os.path.join(DATA_DIR, 'one_p_mnist', 'mnist.pkl.gz') pickle_args = {} if sys.version_info.major == 2 else {'encoding': 'latin1'} # Load the data into memory as numpy arrays with gzip.open(data_path, 'rb') as f: train_set, _, _ = pickle.load(f, **pickle_args) knn = KNN(role='SageMakerRole', train_instance_count=1, train_instance_type='ml.c4.xlarge', k=10, predictor_type='classifier', sample_size=500, index_type='faiss.IVFFlat', index_metric='L2', sagemaker_session=sagemaker_session, base_job_name='test-knn-cl') # training labels must be 'float32' knn.fit(knn.record_set(train_set[0][:200], train_set[1][:200].astype('float32')), wait=False) training_job_name = knn.latest_training_job.name print("Detached from training job. Will re-attach in 20 seconds") time.sleep(20) print("attaching now...") with timeout_and_delete_endpoint_by_name(endpoint_name, sagemaker_session): estimator = KNN.attach(training_job_name=training_job_name, sagemaker_session=sagemaker_session) model = KNNModel(estimator.model_data, role='SageMakerRole', sagemaker_session=sagemaker_session) predictor = model.deploy(1, 'ml.c4.xlarge', endpoint_name=endpoint_name) result = predictor.predict(train_set[0][:10]) assert len(result) == 10 for record in result: assert record.label["score"] is not None