Ejemplo n.º 1
0
def test_marketplace_transform_job_from_model_package(sagemaker_session,
                                                      cpu_instance_type):
    data_path = os.path.join(DATA_DIR, "marketplace", "training")
    shape = pandas.read_csv(data_path + "/iris.csv", header=None).drop([0],
                                                                       axis=1)

    TRANSFORM_WORKDIR = DATA_DIR + "/marketplace/transform"
    shape.to_csv(TRANSFORM_WORKDIR + "/batchtransform_test.csv",
                 index=False,
                 header=False)
    transform_input = sagemaker_session.upload_data(
        TRANSFORM_WORKDIR, key_prefix="integ-test-data/marketplace/transform")

    region = sagemaker_session.boto_region_name
    account = REGION_ACCOUNT_MAP[region]
    model_package_arn = MODEL_PACKAGE_ARN.format(
        partition=_aws_partition(region), region=region, account=account)

    model = ModelPackage(
        role="SageMakerRole",
        model_package_arn=model_package_arn,
        sagemaker_session=sagemaker_session,
    )

    transformer = model.transformer(1, cpu_instance_type)
    transformer.transform(transform_input, content_type="text/csv")
    transformer.wait()
def test_marketplace_model(sagemaker_session, cpu_instance_type):
    region = sagemaker_session.boto_region_name
    account = REGION_ACCOUNT_MAP[region]
    model_package_arn = MODEL_PACKAGE_ARN.format(
        partition=_aws_partition(region), region=region, account=account
    )

    def predict_wrapper(endpoint, session):
        return sagemaker.Predictor(endpoint, session, serializer=CSVSerializer())

    model = ModelPackage(
        role="SageMakerRole",
        model_package_arn=model_package_arn,
        sagemaker_session=sagemaker_session,
        predictor_cls=predict_wrapper,
    )

    endpoint_name = "test-marketplace-model-endpoint{}".format(sagemaker_timestamp())
    with timeout_and_delete_endpoint_by_name(endpoint_name, sagemaker_session, minutes=20):
        predictor = model.deploy(1, cpu_instance_type, endpoint_name=endpoint_name)
        data_path = os.path.join(DATA_DIR, "marketplace", "training")
        shape = pandas.read_csv(os.path.join(data_path, "iris.csv"), header=None)
        a = [50 * i for i in range(3)]
        b = [40 + i for i in range(10)]
        indices = [i + j for i, j in itertools.product(a, b)]

        test_data = shape.iloc[indices[:-1]]
        test_x = test_data.iloc[:, 1:]

        print(predictor.predict(test_x.values).decode("utf-8"))
Ejemplo n.º 3
0
def test_marketplace_transform_job(sagemaker_session, cpu_instance_type):
    data_path = os.path.join(DATA_DIR, "marketplace", "training")
    region = sagemaker_session.boto_region_name
    account = REGION_ACCOUNT_MAP[region]
    algorithm_arn = ALGORITHM_ARN.format(partition=_aws_partition(region),
                                         region=region,
                                         account=account)

    algo = AlgorithmEstimator(
        algorithm_arn=algorithm_arn,
        role="SageMakerRole",
        train_instance_count=1,
        train_instance_type=cpu_instance_type,
        sagemaker_session=sagemaker_session,
        base_job_name="test-marketplace",
    )

    train_input = algo.sagemaker_session.upload_data(
        path=data_path, key_prefix="integ-test-data/marketplace/train")

    shape = pandas.read_csv(data_path + "/iris.csv", header=None).drop([0],
                                                                       axis=1)

    transform_workdir = DATA_DIR + "/marketplace/transform"
    shape.to_csv(transform_workdir + "/batchtransform_test.csv",
                 index=False,
                 header=False)
    transform_input = algo.sagemaker_session.upload_data(
        transform_workdir, key_prefix="integ-test-data/marketplace/transform")

    algo.fit({"training": train_input})

    transformer = algo.transformer(1, cpu_instance_type)
    transformer.transform(transform_input, content_type="text/csv")
    transformer.wait()
Ejemplo n.º 4
0
def _create_kms_key(kms_client,
                    account_id,
                    region,
                    role_arn=None,
                    sagemaker_role="SageMakerRole",
                    alias=KEY_ALIAS):
    if role_arn:
        principal = PRINCIPAL_TEMPLATE.format(
            partition=utils._aws_partition(region),
            account_id=account_id,
            role_arn=role_arn,
            sagemaker_role=sagemaker_role,
        )
    else:
        principal = '"{account_id}"'.format(account_id=account_id)

    response = kms_client.create_key(
        Policy=KEY_POLICY.format(id=POLICY_NAME,
                                 principal=principal,
                                 sagemaker_role=sagemaker_role),
        Description="KMS key for SageMaker Python SDK integ tests",
    )
    key_arn = response["KeyMetadata"]["Arn"]

    if alias:
        kms_client.create_alias(AliasName="alias/" + alias,
                                TargetKeyId=key_arn)
    return key_arn
Ejemplo n.º 5
0
def bucket_with_encryption(sagemaker_session, sagemaker_role):
    boto_session = sagemaker_session.boto_session
    region = boto_session.region_name
    sts_client = boto_session.client(
        "sts",
        region_name=region,
        endpoint_url=utils.sts_regional_endpoint(region))

    account = sts_client.get_caller_identity()["Account"]
    role_arn = sts_client.get_caller_identity()["Arn"]

    kms_client = boto_session.client("kms")
    kms_key_arn = _create_kms_key(kms_client, account, region, role_arn,
                                  sagemaker_role, None)

    region = boto_session.region_name
    bucket_name = "sagemaker-{}-{}-with-kms".format(region, account)

    sagemaker_session._create_s3_bucket_if_it_does_not_exist(
        bucket_name=bucket_name, region=region)

    s3_client = boto_session.client("s3", region_name=region)
    s3_client.put_bucket_encryption(
        Bucket=bucket_name,
        ServerSideEncryptionConfiguration={
            "Rules": [{
                "ApplyServerSideEncryptionByDefault": {
                    "SSEAlgorithm":
                    "{partition}:kms".format(
                        partition=utils._aws_partition(region)),
                    "KMSMasterKeyID":
                    kms_key_arn,
                }
            }]
        },
    )

    s3_client.put_bucket_policy(
        Bucket=bucket_name,
        Policy=KMS_BUCKET_POLICY.format(partition=utils._aws_partition(region),
                                        bucket_name=bucket_name),
    )

    yield "s3://" + bucket_name, kms_key_arn

    kms_client.schedule_key_deletion(KeyId=kms_key_arn, PendingWindowInDays=7)
Ejemplo n.º 6
0
def test_marketplace_attach(sagemaker_session, cpu_instance_type):
    with timeout(minutes=15):
        data_path = os.path.join(DATA_DIR, "marketplace", "training")
        region = sagemaker_session.boto_region_name
        account = REGION_ACCOUNT_MAP[region]
        algorithm_arn = ALGORITHM_ARN.format(partition=_aws_partition(region),
                                             region=region,
                                             account=account)

        mktplace = AlgorithmEstimator(
            algorithm_arn=algorithm_arn,
            role="SageMakerRole",
            train_instance_count=1,
            train_instance_type=cpu_instance_type,
            sagemaker_session=sagemaker_session,
            base_job_name="test-marketplace",
        )

        train_input = mktplace.sagemaker_session.upload_data(
            path=data_path, key_prefix="integ-test-data/marketplace/train")

        mktplace.fit({"training": train_input}, wait=False)
        training_job_name = mktplace.latest_training_job.name

        print("Waiting to re-attach to the training job: %s" %
              training_job_name)
        time.sleep(20)
        endpoint_name = "test-marketplace-estimator{}".format(
            sagemaker_timestamp())

    with timeout_and_delete_endpoint_by_name(endpoint_name,
                                             sagemaker_session,
                                             minutes=20):
        print("Re-attaching now to: %s" % training_job_name)
        estimator = AlgorithmEstimator.attach(
            training_job_name=training_job_name,
            sagemaker_session=sagemaker_session)
        predictor = estimator.deploy(
            1,
            cpu_instance_type,
            endpoint_name=endpoint_name,
            serializer=sagemaker.predictor.csv_serializer,
        )
        shape = pandas.read_csv(os.path.join(data_path, "iris.csv"),
                                header=None)
        a = [50 * i for i in range(3)]
        b = [40 + i for i in range(10)]
        indices = [i + j for i, j in itertools.product(a, b)]

        test_data = shape.iloc[indices[:-1]]
        test_x = test_data.iloc[:, 1:]

        print(predictor.predict(test_x.values).decode("utf-8"))
Ejemplo n.º 7
0
def test_marketplace_estimator(sagemaker_session, cpu_instance_type):
    with timeout(minutes=15):
        data_path = os.path.join(DATA_DIR, "marketplace", "training")
        region = sagemaker_session.boto_region_name
        account = REGION_ACCOUNT_MAP[region]
        algorithm_arn = ALGORITHM_ARN.format(partition=_aws_partition(region),
                                             region=region,
                                             account=account)

        algo = AlgorithmEstimator(
            algorithm_arn=algorithm_arn,
            role="SageMakerRole",
            train_instance_count=1,
            train_instance_type=cpu_instance_type,
            sagemaker_session=sagemaker_session,
        )

        train_input = algo.sagemaker_session.upload_data(
            path=data_path, key_prefix="integ-test-data/marketplace/train")

        algo.fit({"training": train_input})

    endpoint_name = "test-marketplace-estimator{}".format(
        sagemaker_timestamp())
    with timeout_and_delete_endpoint_by_name(endpoint_name,
                                             sagemaker_session,
                                             minutes=20):
        predictor = algo.deploy(1,
                                cpu_instance_type,
                                endpoint_name=endpoint_name)
        shape = pandas.read_csv(os.path.join(data_path, "iris.csv"),
                                header=None)

        a = [50 * i for i in range(3)]
        b = [40 + i for i in range(10)]
        indices = [i + j for i, j in itertools.product(a, b)]

        test_data = shape.iloc[indices[:-1]]
        test_x = test_data.iloc[:, 1:]

        print(predictor.predict(test_x.values).decode("utf-8"))
def test_marketplace_tuning_job(sagemaker_session, cpu_instance_type):
    data_path = os.path.join(DATA_DIR, "marketplace", "training")
    region = sagemaker_session.boto_region_name
    account = REGION_ACCOUNT_MAP[region]
    algorithm_arn = ALGORITHM_ARN.format(
        partition=_aws_partition(region), region=region, account=account
    )

    mktplace = AlgorithmEstimator(
        algorithm_arn=algorithm_arn,
        role="SageMakerRole",
        instance_count=1,
        instance_type=cpu_instance_type,
        sagemaker_session=sagemaker_session,
        base_job_name="test-marketplace",
    )

    train_input = mktplace.sagemaker_session.upload_data(
        path=data_path, key_prefix="integ-test-data/marketplace/train"
    )

    mktplace.set_hyperparameters(max_leaf_nodes=10)

    hyperparameter_ranges = {"max_leaf_nodes": IntegerParameter(1, 100000)}

    tuner = HyperparameterTuner(
        estimator=mktplace,
        base_tuning_job_name="byo",
        objective_metric_name="validation:accuracy",
        hyperparameter_ranges=hyperparameter_ranges,
        max_jobs=2,
        max_parallel_jobs=2,
    )

    tuner.fit({"training": train_input}, include_cls_metadata=False)
    time.sleep(15)
    tuner.wait()
Ejemplo n.º 9
0
def _add_role_to_policy(kms_client,
                        account_id,
                        role_arn,
                        region,
                        alias=KEY_ALIAS,
                        sagemaker_role="SageMakerRole"):
    key_id = _get_kms_key_id(kms_client, alias)
    policy = kms_client.get_key_policy(KeyId=key_id, PolicyName=POLICY_NAME)
    policy = json.loads(policy["Policy"])
    principal = policy["Statement"][0]["Principal"]["AWS"]

    if role_arn not in principal or sagemaker_role not in principal:
        principal = PRINCIPAL_TEMPLATE.format(
            partition=utils._aws_partition(region),
            account_id=account_id,
            role_arn=role_arn,
            sagemaker_role=sagemaker_role,
        )

        kms_client.put_key_policy(
            KeyId=key_id,
            PolicyName=POLICY_NAME,
            Policy=KEY_POLICY.format(id=POLICY_NAME, principal=principal),
        )