예제 #1
0
def test_marketplace_transform_job(sagemaker_session, cpu_instance_type):
    data_path = os.path.join(DATA_DIR, "marketplace", "training")
    region = sagemaker_session.boto_region_name
    account = REGION_ACCOUNT_MAP[region]
    algorithm_arn = ALGORITHM_ARN.format(partition=_aws_partition(region),
                                         region=region,
                                         account=account)

    algo = AlgorithmEstimator(
        algorithm_arn=algorithm_arn,
        role="SageMakerRole",
        train_instance_count=1,
        train_instance_type=cpu_instance_type,
        sagemaker_session=sagemaker_session,
        base_job_name="test-marketplace",
    )

    train_input = algo.sagemaker_session.upload_data(
        path=data_path, key_prefix="integ-test-data/marketplace/train")

    shape = pandas.read_csv(data_path + "/iris.csv", header=None).drop([0],
                                                                       axis=1)

    transform_workdir = DATA_DIR + "/marketplace/transform"
    shape.to_csv(transform_workdir + "/batchtransform_test.csv",
                 index=False,
                 header=False)
    transform_input = algo.sagemaker_session.upload_data(
        transform_workdir, key_prefix="integ-test-data/marketplace/transform")

    algo.fit({"training": train_input})

    transformer = algo.transformer(1, cpu_instance_type)
    transformer.transform(transform_input, content_type="text/csv")
    transformer.wait()
예제 #2
0
def test_marketplace_estimator(sagemaker_session):
    with timeout(minutes=15):
        data_path = os.path.join(DATA_DIR, 'marketplace', 'training')
        region = sagemaker_session.boto_region_name
        account = REGION_ACCOUNT_MAP[region]
        algorithm_arn = ALGORITHM_ARN % (region, account)

        algo = AlgorithmEstimator(algorithm_arn=algorithm_arn,
                                  role='SageMakerRole',
                                  train_instance_count=1,
                                  train_instance_type='ml.c4.xlarge',
                                  sagemaker_session=sagemaker_session)

        train_input = algo.sagemaker_session.upload_data(
            path=data_path, key_prefix='integ-test-data/marketplace/train')

        algo.fit({'training': train_input})

    endpoint_name = 'test-marketplace-estimator{}'.format(
        sagemaker_timestamp())
    with timeout_and_delete_endpoint_by_name(endpoint_name,
                                             sagemaker_session,
                                             minutes=20):
        predictor = algo.deploy(1, 'ml.m4.xlarge', endpoint_name=endpoint_name)
        shape = pandas.read_csv(os.path.join(data_path, 'iris.csv'),
                                header=None)

        a = [50 * i for i in range(3)]
        b = [40 + i for i in range(10)]
        indices = [i + j for i, j in itertools.product(a, b)]

        test_data = shape.iloc[indices[:-1]]
        test_x = test_data.iloc[:, 1:]

        print(predictor.predict(test_x.values).decode('utf-8'))
예제 #3
0
def test_marketplace_transform_job(sagemaker_session):
    data_path = os.path.join(DATA_DIR, 'marketplace', 'training')
    region = sagemaker_session.boto_region_name
    account = REGION_ACCOUNT_MAP[region]
    algorithm_arn = ALGORITHM_ARN % (region, account)

    algo = AlgorithmEstimator(algorithm_arn=algorithm_arn,
                              role='SageMakerRole',
                              train_instance_count=1,
                              train_instance_type='ml.c4.xlarge',
                              sagemaker_session=sagemaker_session,
                              base_job_name='test-marketplace')

    train_input = algo.sagemaker_session.upload_data(
        path=data_path, key_prefix='integ-test-data/marketplace/train')

    shape = pandas.read_csv(data_path + '/iris.csv', header=None).drop([0],
                                                                       axis=1)

    transform_workdir = DATA_DIR + '/marketplace/transform'
    shape.to_csv(transform_workdir + '/batchtransform_test.csv',
                 index=False,
                 header=False)
    transform_input = algo.sagemaker_session.upload_data(
        transform_workdir, key_prefix='integ-test-data/marketplace/transform')

    algo.fit({'training': train_input})

    transformer = algo.transformer(1, 'ml.m4.xlarge')
    transformer.transform(transform_input, content_type='text/csv')
    transformer.wait()
예제 #4
0
def test_marketplace_attach(sagemaker_session, cpu_instance_type):
    with timeout(minutes=15):
        data_path = os.path.join(DATA_DIR, "marketplace", "training")
        region = sagemaker_session.boto_region_name
        account = REGION_ACCOUNT_MAP[region]
        algorithm_arn = ALGORITHM_ARN.format(partition=_aws_partition(region),
                                             region=region,
                                             account=account)

        mktplace = AlgorithmEstimator(
            algorithm_arn=algorithm_arn,
            role="SageMakerRole",
            train_instance_count=1,
            train_instance_type=cpu_instance_type,
            sagemaker_session=sagemaker_session,
            base_job_name="test-marketplace",
        )

        train_input = mktplace.sagemaker_session.upload_data(
            path=data_path, key_prefix="integ-test-data/marketplace/train")

        mktplace.fit({"training": train_input}, wait=False)
        training_job_name = mktplace.latest_training_job.name

        print("Waiting to re-attach to the training job: %s" %
              training_job_name)
        time.sleep(20)
        endpoint_name = "test-marketplace-estimator{}".format(
            sagemaker_timestamp())

    with timeout_and_delete_endpoint_by_name(endpoint_name,
                                             sagemaker_session,
                                             minutes=20):
        print("Re-attaching now to: %s" % training_job_name)
        estimator = AlgorithmEstimator.attach(
            training_job_name=training_job_name,
            sagemaker_session=sagemaker_session)
        predictor = estimator.deploy(
            1,
            cpu_instance_type,
            endpoint_name=endpoint_name,
            serializer=sagemaker.predictor.csv_serializer,
        )
        shape = pandas.read_csv(os.path.join(data_path, "iris.csv"),
                                header=None)
        a = [50 * i for i in range(3)]
        b = [40 + i for i in range(10)]
        indices = [i + j for i, j in itertools.product(a, b)]

        test_data = shape.iloc[indices[:-1]]
        test_x = test_data.iloc[:, 1:]

        print(predictor.predict(test_x.values).decode("utf-8"))
예제 #5
0
def test_marketplace_attach(sagemaker_session):
    with timeout(minutes=15):
        data_path = os.path.join(DATA_DIR, 'marketplace', 'training')
        region = sagemaker_session.boto_region_name
        account = REGION_ACCOUNT_MAP[region]
        algorithm_arn = ALGORITHM_ARN % (region, account)

        mktplace = AlgorithmEstimator(algorithm_arn=algorithm_arn,
                                      role='SageMakerRole',
                                      train_instance_count=1,
                                      train_instance_type='ml.c4.xlarge',
                                      sagemaker_session=sagemaker_session,
                                      base_job_name='test-marketplace')

        train_input = mktplace.sagemaker_session.upload_data(
            path=data_path, key_prefix='integ-test-data/marketplace/train')

        mktplace.fit({'training': train_input}, wait=False)
        training_job_name = mktplace.latest_training_job.name

        print('Waiting to re-attach to the training job: %s' %
              training_job_name)
        time.sleep(20)
        endpoint_name = 'test-marketplace-estimator{}'.format(
            sagemaker_timestamp())

    with timeout_and_delete_endpoint_by_name(endpoint_name,
                                             sagemaker_session,
                                             minutes=20):
        print('Re-attaching now to: %s' % training_job_name)
        estimator = AlgorithmEstimator.attach(
            training_job_name=training_job_name,
            sagemaker_session=sagemaker_session)
        predictor = estimator.deploy(
            1,
            'ml.m4.xlarge',
            endpoint_name=endpoint_name,
            serializer=sagemaker.predictor.csv_serializer)
        shape = pandas.read_csv(os.path.join(data_path, 'iris.csv'),
                                header=None)
        a = [50 * i for i in range(3)]
        b = [40 + i for i in range(10)]
        indices = [i + j for i, j in itertools.product(a, b)]

        test_data = shape.iloc[indices[:-1]]
        test_x = test_data.iloc[:, 1:]

        print(predictor.predict(test_x.values).decode('utf-8'))
예제 #6
0
def test_marketplace_estimator(sagemaker_session, cpu_instance_type):
    with timeout(minutes=15):
        data_path = os.path.join(DATA_DIR, "marketplace", "training")
        region = sagemaker_session.boto_region_name
        account = REGION_ACCOUNT_MAP[region]
        algorithm_arn = ALGORITHM_ARN.format(partition=_aws_partition(region),
                                             region=region,
                                             account=account)

        algo = AlgorithmEstimator(
            algorithm_arn=algorithm_arn,
            role="SageMakerRole",
            train_instance_count=1,
            train_instance_type=cpu_instance_type,
            sagemaker_session=sagemaker_session,
        )

        train_input = algo.sagemaker_session.upload_data(
            path=data_path, key_prefix="integ-test-data/marketplace/train")

        algo.fit({"training": train_input})

    endpoint_name = "test-marketplace-estimator{}".format(
        sagemaker_timestamp())
    with timeout_and_delete_endpoint_by_name(endpoint_name,
                                             sagemaker_session,
                                             minutes=20):
        predictor = algo.deploy(1,
                                cpu_instance_type,
                                endpoint_name=endpoint_name)
        shape = pandas.read_csv(os.path.join(data_path, "iris.csv"),
                                header=None)

        a = [50 * i for i in range(3)]
        b = [40 + i for i in range(10)]
        indices = [i + j for i, j in itertools.product(a, b)]

        test_data = shape.iloc[indices[:-1]]
        test_x = test_data.iloc[:, 1:]

        print(predictor.predict(test_x.values).decode("utf-8"))