Example #1
0
def test_knn_regressor(sagemaker_session, cpu_instance_type, training_set):
    job_name = unique_name_from_base("knn")

    with timeout(minutes=TRAINING_DEFAULT_TIMEOUT_MINUTES):
        knn = KNN(
            role="SageMakerRole",
            instance_count=1,
            instance_type=cpu_instance_type,
            k=10,
            predictor_type="regressor",
            sample_size=500,
            sagemaker_session=sagemaker_session,
        )

        # training labels must be 'float32'
        knn.fit(
            knn.record_set(training_set[0][:200],
                           training_set[1][:200].astype("float32")),
            job_name=job_name,
        )

    with timeout_and_delete_endpoint_by_name(job_name, sagemaker_session):
        model = KNNModel(knn.model_data,
                         role="SageMakerRole",
                         sagemaker_session=sagemaker_session)
        predictor = model.deploy(1, cpu_instance_type, endpoint_name=job_name)
        result = predictor.predict(training_set[0][:10])

        assert len(result) == 10
        for record in result:
            assert record.label["score"] is not None
def test_knn_airflow_config_uploads_data_source_to_s3(sagemaker_session, cpu_instance_type):
    with timeout(seconds=AIRFLOW_CONFIG_TIMEOUT_IN_SECONDS):
        data_path = os.path.join(DATA_DIR, "one_p_mnist", "mnist.pkl.gz")
        pickle_args = {} if sys.version_info.major == 2 else {"encoding": "latin1"}

        # Load the data into memory as numpy arrays
        with gzip.open(data_path, "rb") as f:
            train_set, _, _ = pickle.load(f, **pickle_args)

        knn = KNN(
            role=ROLE,
            train_instance_count=SINGLE_INSTANCE_COUNT,
            train_instance_type=cpu_instance_type,
            k=10,
            predictor_type="regressor",
            sample_size=500,
            sagemaker_session=sagemaker_session,
        )

        records = knn.record_set(train_set[0][:200], train_set[1][:200].astype("float32"))

        training_config = _build_airflow_workflow(
            estimator=knn, instance_type=cpu_instance_type, inputs=records
        )

        _assert_that_s3_url_contains_data(
            sagemaker_session,
            training_config["InputDataConfig"][0]["DataSource"]["S3DataSource"]["S3Uri"],
        )
def test_knn_airflow_config_uploads_data_source_to_s3(sagemaker_session,
                                                      cpu_instance_type):
    with timeout(seconds=AIRFLOW_CONFIG_TIMEOUT_IN_SECONDS):
        knn = KNN(
            role=ROLE,
            instance_count=SINGLE_INSTANCE_COUNT,
            instance_type=cpu_instance_type,
            k=10,
            predictor_type="regressor",
            sample_size=500,
            sagemaker_session=sagemaker_session,
        )

        training_set = datasets.one_p_mnist()
        records = knn.record_set(training_set[0][:200],
                                 training_set[1][:200].astype("float32"))

        training_config = _build_airflow_workflow(
            estimator=knn, instance_type=cpu_instance_type, inputs=records)

        _assert_that_s3_url_contains_data(
            sagemaker_session,
            training_config["InputDataConfig"][0]["DataSource"]["S3DataSource"]
            ["S3Uri"],
        )
def test_knn_regressor(sagemaker_session, cpu_instance_type):
    job_name = unique_name_from_base("knn")

    with timeout(minutes=TRAINING_DEFAULT_TIMEOUT_MINUTES):
        data_path = os.path.join(DATA_DIR, "one_p_mnist", "mnist.pkl.gz")
        pickle_args = {} if sys.version_info.major == 2 else {"encoding": "latin1"}

        # Load the data into memory as numpy arrays
        with gzip.open(data_path, "rb") as f:
            train_set, _, _ = pickle.load(f, **pickle_args)

        knn = KNN(
            role="SageMakerRole",
            train_instance_count=1,
            train_instance_type=cpu_instance_type,
            k=10,
            predictor_type="regressor",
            sample_size=500,
            sagemaker_session=sagemaker_session,
        )

        # training labels must be 'float32'
        knn.fit(
            knn.record_set(train_set[0][:200], train_set[1][:200].astype("float32")),
            job_name=job_name,
        )

    with timeout_and_delete_endpoint_by_name(job_name, sagemaker_session):
        model = KNNModel(knn.model_data, role="SageMakerRole", sagemaker_session=sagemaker_session)
        predictor = model.deploy(1, cpu_instance_type, endpoint_name=job_name)
        result = predictor.predict(train_set[0][:10])

        assert len(result) == 10
        for record in result:
            assert record.label["score"] is not None
def test_knn_regressor(sagemaker_session):
    with timeout(minutes=TRAINING_DEFAULT_TIMEOUT_MINUTES):
        data_path = os.path.join(DATA_DIR, 'one_p_mnist', 'mnist.pkl.gz')
        pickle_args = {} if sys.version_info.major == 2 else {'encoding': 'latin1'}

        # Load the data into memory as numpy arrays
        with gzip.open(data_path, 'rb') as f:
            train_set, _, _ = pickle.load(f, **pickle_args)

        knn = KNN(role='SageMakerRole', train_instance_count=1,
                  train_instance_type='ml.c4.xlarge',
                  k=10, predictor_type='regressor', sample_size=500,
                  sagemaker_session=sagemaker_session, base_job_name='test-knn-rr')

        # training labels must be 'float32'
        knn.fit(knn.record_set(train_set[0][:200], train_set[1][:200].astype('float32')))

    endpoint_name = name_from_base('knn')
    with timeout_and_delete_endpoint_by_name(endpoint_name, sagemaker_session):
        model = KNNModel(knn.model_data, role='SageMakerRole', sagemaker_session=sagemaker_session)
        predictor = model.deploy(1, 'ml.c4.xlarge', endpoint_name=endpoint_name)
        result = predictor.predict(train_set[0][:10])

        assert len(result) == 10
        for record in result:
            assert record.label["score"] is not None
def test_knn_regressor(sagemaker_session):
    with timeout(minutes=TRAINING_DEFAULT_TIMEOUT_MINUTES):
        data_path = os.path.join(DATA_DIR, 'one_p_mnist', 'mnist.pkl.gz')
        pickle_args = {} if sys.version_info.major == 2 else {
            'encoding': 'latin1'
        }

        # Load the data into memory as numpy arrays
        with gzip.open(data_path, 'rb') as f:
            train_set, _, _ = pickle.load(f, **pickle_args)

        knn = KNN(role='SageMakerRole',
                  train_instance_count=1,
                  train_instance_type='ml.c4.xlarge',
                  k=10,
                  predictor_type='regressor',
                  sample_size=500,
                  sagemaker_session=sagemaker_session,
                  base_job_name='test-knn-rr')

        # training labels must be 'float32'
        knn.fit(
            knn.record_set(train_set[0][:200],
                           train_set[1][:200].astype('float32')))

    endpoint_name = unique_name_from_base('knn')
    with timeout_and_delete_endpoint_by_name(endpoint_name, sagemaker_session):
        model = KNNModel(knn.model_data,
                         role='SageMakerRole',
                         sagemaker_session=sagemaker_session)
        predictor = model.deploy(1,
                                 'ml.c4.xlarge',
                                 endpoint_name=endpoint_name)
        result = predictor.predict(train_set[0][:10])

        assert len(result) == 10
        for record in result:
            assert record.label["score"] is not None
def test_async_knn_classifier(sagemaker_session):
    training_job_name = ""
    endpoint_name = name_from_base('knn')

    with timeout(minutes=5):
        data_path = os.path.join(DATA_DIR, 'one_p_mnist', 'mnist.pkl.gz')
        pickle_args = {} if sys.version_info.major == 2 else {'encoding': 'latin1'}

        # Load the data into memory as numpy arrays
        with gzip.open(data_path, 'rb') as f:
            train_set, _, _ = pickle.load(f, **pickle_args)

        knn = KNN(role='SageMakerRole',
                  train_instance_count=1, train_instance_type='ml.c4.xlarge',
                  k=10, predictor_type='classifier', sample_size=500,
                  index_type='faiss.IVFFlat', index_metric='L2',
                  sagemaker_session=sagemaker_session, base_job_name='test-knn-cl')

        # training labels must be 'float32'
        knn.fit(knn.record_set(train_set[0][:200], train_set[1][:200].astype('float32')), wait=False)
        training_job_name = knn.latest_training_job.name

        print("Detached from training job. Will re-attach in 20 seconds")
        time.sleep(20)
        print("attaching now...")

    with timeout_and_delete_endpoint_by_name(endpoint_name, sagemaker_session):
        estimator = KNN.attach(training_job_name=training_job_name,
                               sagemaker_session=sagemaker_session)
        model = KNNModel(estimator.model_data, role='SageMakerRole',
                         sagemaker_session=sagemaker_session)
        predictor = model.deploy(1, 'ml.c4.xlarge', endpoint_name=endpoint_name)
        result = predictor.predict(train_set[0][:10])

        assert len(result) == 10
        for record in result:
            assert record.label["score"] is not None
def test_async_knn_classifier(sagemaker_session):
    job_name = unique_name_from_base("knn")

    with timeout(minutes=5):
        data_path = os.path.join(DATA_DIR, "one_p_mnist", "mnist.pkl.gz")
        pickle_args = {} if sys.version_info.major == 2 else {
            "encoding": "latin1"
        }

        # Load the data into memory as numpy arrays
        with gzip.open(data_path, "rb") as f:
            train_set, _, _ = pickle.load(f, **pickle_args)

        knn = KNN(
            role="SageMakerRole",
            train_instance_count=1,
            train_instance_type="ml.c4.xlarge",
            k=10,
            predictor_type="classifier",
            sample_size=500,
            index_type="faiss.IVFFlat",
            index_metric="L2",
            sagemaker_session=sagemaker_session,
        )

        # training labels must be 'float32'
        knn.fit(
            knn.record_set(train_set[0][:200],
                           train_set[1][:200].astype("float32")),
            wait=False,
            job_name=job_name,
        )

        print("Detached from training job. Will re-attach in 20 seconds")
        time.sleep(20)
        print("attaching now...")

    with timeout_and_delete_endpoint_by_name(job_name, sagemaker_session):
        estimator = KNN.attach(training_job_name=job_name,
                               sagemaker_session=sagemaker_session)
        model = KNNModel(estimator.model_data,
                         role="SageMakerRole",
                         sagemaker_session=sagemaker_session)
        predictor = model.deploy(1, "ml.c4.xlarge", endpoint_name=job_name)
        result = predictor.predict(train_set[0][:10])

        assert len(result) == 10
        for record in result:
            assert record.label["score"] is not None
def test_async_knn_classifier(sagemaker_session):
    training_job_name = ""
    endpoint_name = unique_name_from_base('knn')

    with timeout(minutes=5):
        data_path = os.path.join(DATA_DIR, 'one_p_mnist', 'mnist.pkl.gz')
        pickle_args = {} if sys.version_info.major == 2 else {
            'encoding': 'latin1'
        }

        # Load the data into memory as numpy arrays
        with gzip.open(data_path, 'rb') as f:
            train_set, _, _ = pickle.load(f, **pickle_args)

        knn = KNN(role='SageMakerRole',
                  train_instance_count=1,
                  train_instance_type='ml.c4.xlarge',
                  k=10,
                  predictor_type='classifier',
                  sample_size=500,
                  index_type='faiss.IVFFlat',
                  index_metric='L2',
                  sagemaker_session=sagemaker_session,
                  base_job_name='test-knn-cl')

        # training labels must be 'float32'
        knn.fit(knn.record_set(train_set[0][:200],
                               train_set[1][:200].astype('float32')),
                wait=False)
        training_job_name = knn.latest_training_job.name

        print("Detached from training job. Will re-attach in 20 seconds")
        time.sleep(20)
        print("attaching now...")

    with timeout_and_delete_endpoint_by_name(endpoint_name, sagemaker_session):
        estimator = KNN.attach(training_job_name=training_job_name,
                               sagemaker_session=sagemaker_session)
        model = KNNModel(estimator.model_data,
                         role='SageMakerRole',
                         sagemaker_session=sagemaker_session)
        predictor = model.deploy(1,
                                 'ml.c4.xlarge',
                                 endpoint_name=endpoint_name)
        result = predictor.predict(train_set[0][:10])

        assert len(result) == 10
        for record in result:
            assert record.label["score"] is not None
Example #10
0
def test_async_knn_classifier(sagemaker_session, cpu_instance_type,
                              training_set):
    job_name = unique_name_from_base("knn")

    with timeout(minutes=5):
        knn = KNN(
            role="SageMakerRole",
            instance_count=1,
            instance_type=cpu_instance_type,
            k=10,
            predictor_type="classifier",
            sample_size=500,
            index_type="faiss.IVFFlat",
            index_metric="L2",
            sagemaker_session=sagemaker_session,
        )

        # training labels must be 'float32'
        knn.fit(
            knn.record_set(training_set[0][:200],
                           training_set[1][:200].astype("float32")),
            wait=False,
            job_name=job_name,
        )

        print("Detached from training job. Will re-attach in 20 seconds")
        time.sleep(20)
        print("attaching now...")

    with timeout_and_delete_endpoint_by_name(job_name, sagemaker_session):
        estimator = KNN.attach(training_job_name=job_name,
                               sagemaker_session=sagemaker_session)
        model = KNNModel(estimator.model_data,
                         role="SageMakerRole",
                         sagemaker_session=sagemaker_session)
        predictor = model.deploy(1, cpu_instance_type, endpoint_name=job_name)
        result = predictor.predict(training_set[0][:10])

        assert len(result) == 10
        for record in result:
            assert record.label["score"] is not None