Exemple #1
0
def test_marketplace_transform_job_from_model_package(sagemaker_session,
                                                      cpu_instance_type):
    data_path = os.path.join(DATA_DIR, "marketplace", "training")
    shape = pandas.read_csv(data_path + "/iris.csv", header=None).drop([0],
                                                                       axis=1)

    TRANSFORM_WORKDIR = DATA_DIR + "/marketplace/transform"
    shape.to_csv(TRANSFORM_WORKDIR + "/batchtransform_test.csv",
                 index=False,
                 header=False)
    transform_input = sagemaker_session.upload_data(
        TRANSFORM_WORKDIR, key_prefix="integ-test-data/marketplace/transform")

    region = sagemaker_session.boto_region_name
    account = REGION_ACCOUNT_MAP[region]
    model_package_arn = MODEL_PACKAGE_ARN.format(
        partition=_aws_partition(region), region=region, account=account)

    model = ModelPackage(
        role="SageMakerRole",
        model_package_arn=model_package_arn,
        sagemaker_session=sagemaker_session,
    )

    transformer = model.transformer(1, cpu_instance_type)
    transformer.transform(transform_input, content_type="text/csv")
    transformer.wait()
Exemple #2
0
def test_marketplace_model(sagemaker_session, cpu_instance_type):
    region = sagemaker_session.boto_region_name
    account = REGION_ACCOUNT_MAP[region]
    model_package_arn = MODEL_PACKAGE_ARN % (region, account)

    def predict_wrapper(endpoint, session):
        return sagemaker.RealTimePredictor(
            endpoint, session, serializer=sagemaker.predictor.csv_serializer)

    model = ModelPackage(
        role="SageMakerRole",
        model_package_arn=model_package_arn,
        sagemaker_session=sagemaker_session,
        predictor_cls=predict_wrapper,
    )

    endpoint_name = "test-marketplace-model-endpoint{}".format(
        sagemaker_timestamp())
    with timeout_and_delete_endpoint_by_name(endpoint_name,
                                             sagemaker_session,
                                             minutes=20):
        predictor = model.deploy(1,
                                 cpu_instance_type,
                                 endpoint_name=endpoint_name)
        data_path = os.path.join(DATA_DIR, "marketplace", "training")
        shape = pandas.read_csv(os.path.join(data_path, "iris.csv"),
                                header=None)
        a = [50 * i for i in range(3)]
        b = [40 + i for i in range(10)]
        indices = [i + j for i, j in itertools.product(a, b)]

        test_data = shape.iloc[indices[:-1]]
        test_x = test_data.iloc[:, 1:]

        print(predictor.predict(test_x.values).decode("utf-8"))
Exemple #3
0
def test_marketplace_model(sagemaker_session):
    def predict_wrapper(endpoint, session):
        return sagemaker.RealTimePredictor(
            endpoint, session, serializer=sagemaker.predictor.csv_serializer)

    model = ModelPackage(
        role='SageMakerRole',
        model_package_arn=(MODEL_PACKAGE_ARN %
                           sagemaker_session.boto_region_name),
        sagemaker_session=sagemaker_session,
        predictor_cls=predict_wrapper)

    endpoint_name = 'test-marketplace-model-endpoint{}'.format(
        sagemaker_timestamp())
    with timeout_and_delete_endpoint_by_name(endpoint_name,
                                             sagemaker_session,
                                             minutes=20):
        predictor = model.deploy(1,
                                 'ml.m4.xlarge',
                                 endpoint_name=endpoint_name)
        data_path = os.path.join(DATA_DIR, 'marketplace', 'training')
        shape = pandas.read_csv(os.path.join(data_path, 'iris.csv'),
                                header=None)
        a = [50 * i for i in range(3)]
        b = [40 + i for i in range(10)]
        indices = [i + j for i, j in itertools.product(a, b)]

        test_data = shape.iloc[indices[:-1]]
        test_x = test_data.iloc[:, 1:]

        print(predictor.predict(test_x.values).decode('utf-8'))
def test_marketplace_transform_job_from_model_package(sagemaker_session):
    data_path = os.path.join(DATA_DIR, 'marketplace', 'training')
    shape = pandas.read_csv(data_path + '/iris.csv', header=None).drop([0], axis=1)

    TRANSFORM_WORKDIR = DATA_DIR + '/marketplace/transform'
    shape.to_csv(TRANSFORM_WORKDIR + '/batchtransform_test.csv', index=False, header=False)
    transform_input = sagemaker_session.upload_data(
        TRANSFORM_WORKDIR,
        key_prefix='integ-test-data/marketplace/transform')

    model = ModelPackage(role='SageMakerRole',
                         model_package_arn=(MODEL_PACKAGE_ARN % sagemaker_session.boto_region_name),
                         sagemaker_session=sagemaker_session)

    transformer = model.transformer(1, 'ml.m4.xlarge')
    transformer.transform(transform_input, content_type='text/csv')
    transformer.wait()
                               )
        # Update the existing Endpoint
        create_endpoint_api_response = sagemaker_boto_client.update_endpoint(
                            EndpointName=args.endpoint_name,
                            EndpointConfigName=ep_config_name
                        )

        create_config('Y')
    except ClientError as error: 
        # endpoint does not exist
        if "Could not find endpoint" in error.response['Error']['Message']: 
            model_package_approved = get_approved_package(args.model_package_group_name)
            model_package_arn = model_package_approved["ModelPackageArn"]

            model = ModelPackage(role=args.role, 
                                 model_package_arn=model_package_arn, 
                                 sagemaker_session=sagemaker_session)
            try:
                model.deploy(initial_instance_count=args.initial_instance_count, 
                             instance_type=args.endpoint_instance_type,
                             endpoint_name=args.endpoint_name)
                create_config('Y')
            except ClientError as error:
                print(error.response['Error']['Message'])
                create_config('N')
                error_message = error.response["Error"]["Message"]
                LOGGER.error("{}".format(stacktrace))
                raise Exception(error_message)
        else:
            print(error.response['Error']['Message'])
            create_config('N')
Exemple #6
0
def cli():
    """
    Simply run this script with '--help' to see all options.
    """
    desc = "Pipeline to train a AWS Marketplace model on Woocommerce data and upload cross-sell product recommendations"

    logging.basicConfig(format="%(asctime)s : %(levelname)s : %(message)s",
                        level=logging.INFO)

    parser = argparse.ArgumentParser(description=desc)

    parser.add_argument("-i",
                        "--input",
                        help="input path",
                        dest="input",
                        required=True)

    parser.add_argument("-o",
                        "--output",
                        help="output_path",
                        dest="output",
                        required=True)

    args = parser.parse_args()

    logging.info("configure services")
    config = Configuration("configs/config.json").parameters

    s3 = S3(
        region_name=config["s3"]["region_name"],
        access_key=config["aws_keys"]["access_key"],
        secret_key=config["aws_keys"]["secret_key"],
        bucket=config["s3"]["bucket"],
    )

    woocommerce = Woocommerce(
        url=config["woocommerce"]["url"],
        consumer_key=config["woocommerce"]["consumer_key"],
        consumer_secret=config["woocommerce"]["consumer_secret"],
    )

    logging.info("download data from Woocommerce")
    woocommerce.download_data(args.input)

    logging.info("upload data to S3")
    s3.upload(args.input, config["s3"]["input_file"])

    logging.info("configure Batch Transform Job")
    model_package_arn = config["batch_transform_job"]["model_package_arn"]

    iam = boto3.client("iam")
    role = iam.get_role(
        RoleName=config["batch_transform_job"]["role_name"])["Role"]["Arn"]

    sagemaker_session = sagemaker.Session()

    model = ModelPackage(
        model_package_arn=model_package_arn,
        role=role,
        sagemaker_session=sagemaker_session,
    )

    output_path = "s3://" + os.path.join(config["s3"]["bucket"], "output")

    transformer = model.transformer(
        instance_count=config["batch_transform_job"]["instance_count"],
        instance_type=config["batch_transform_job"]["instance_type"],
        output_path=output_path,
    )

    input_path = "s3://" + os.path.join(config["s3"]["bucket"],
                                        config["s3"]["input_file"])

    transformer.transform(input_path, content_type="text/csv")

    logging.info('batch transform job starting')
    transformer.wait()

    logging.info("downloading recommendations from S3")
    bucket_folder = transformer.output_path.rsplit("/")[3]
    key = os.path.join(bucket_folder,
                       config["s3"]["input_file"].split("/")[-1] + ".out")

    s3.download(key, args.output)

    logging.info("Push recommendations to Woocommerce")
    woocommerce.upload_recommendations(args.output)