Beispiel #1
0
def test_marketplace_transform_job_from_model_package(sagemaker_session,
                                                      cpu_instance_type):
    data_path = os.path.join(DATA_DIR, "marketplace", "training")
    shape = pandas.read_csv(data_path + "/iris.csv", header=None).drop([0],
                                                                       axis=1)

    TRANSFORM_WORKDIR = DATA_DIR + "/marketplace/transform"
    shape.to_csv(TRANSFORM_WORKDIR + "/batchtransform_test.csv",
                 index=False,
                 header=False)
    transform_input = sagemaker_session.upload_data(
        TRANSFORM_WORKDIR, key_prefix="integ-test-data/marketplace/transform")

    region = sagemaker_session.boto_region_name
    account = REGION_ACCOUNT_MAP[region]
    model_package_arn = MODEL_PACKAGE_ARN.format(
        partition=_aws_partition(region), region=region, account=account)

    model = ModelPackage(
        role="SageMakerRole",
        model_package_arn=model_package_arn,
        sagemaker_session=sagemaker_session,
    )

    transformer = model.transformer(1, cpu_instance_type)
    transformer.transform(transform_input, content_type="text/csv")
    transformer.wait()
def test_marketplace_transform_job_from_model_package(sagemaker_session):
    data_path = os.path.join(DATA_DIR, 'marketplace', 'training')
    shape = pandas.read_csv(data_path + '/iris.csv', header=None).drop([0], axis=1)

    TRANSFORM_WORKDIR = DATA_DIR + '/marketplace/transform'
    shape.to_csv(TRANSFORM_WORKDIR + '/batchtransform_test.csv', index=False, header=False)
    transform_input = sagemaker_session.upload_data(
        TRANSFORM_WORKDIR,
        key_prefix='integ-test-data/marketplace/transform')

    model = ModelPackage(role='SageMakerRole',
                         model_package_arn=(MODEL_PACKAGE_ARN % sagemaker_session.boto_region_name),
                         sagemaker_session=sagemaker_session)

    transformer = model.transformer(1, 'ml.m4.xlarge')
    transformer.transform(transform_input, content_type='text/csv')
    transformer.wait()
Beispiel #3
0
def cli():
    """
    Simply run this script with '--help' to see all options.
    """
    desc = "Pipeline to train a AWS Marketplace model on Woocommerce data and upload cross-sell product recommendations"

    logging.basicConfig(format="%(asctime)s : %(levelname)s : %(message)s",
                        level=logging.INFO)

    parser = argparse.ArgumentParser(description=desc)

    parser.add_argument("-i",
                        "--input",
                        help="input path",
                        dest="input",
                        required=True)

    parser.add_argument("-o",
                        "--output",
                        help="output_path",
                        dest="output",
                        required=True)

    args = parser.parse_args()

    logging.info("configure services")
    config = Configuration("configs/config.json").parameters

    s3 = S3(
        region_name=config["s3"]["region_name"],
        access_key=config["aws_keys"]["access_key"],
        secret_key=config["aws_keys"]["secret_key"],
        bucket=config["s3"]["bucket"],
    )

    woocommerce = Woocommerce(
        url=config["woocommerce"]["url"],
        consumer_key=config["woocommerce"]["consumer_key"],
        consumer_secret=config["woocommerce"]["consumer_secret"],
    )

    logging.info("download data from Woocommerce")
    woocommerce.download_data(args.input)

    logging.info("upload data to S3")
    s3.upload(args.input, config["s3"]["input_file"])

    logging.info("configure Batch Transform Job")
    model_package_arn = config["batch_transform_job"]["model_package_arn"]

    iam = boto3.client("iam")
    role = iam.get_role(
        RoleName=config["batch_transform_job"]["role_name"])["Role"]["Arn"]

    sagemaker_session = sagemaker.Session()

    model = ModelPackage(
        model_package_arn=model_package_arn,
        role=role,
        sagemaker_session=sagemaker_session,
    )

    output_path = "s3://" + os.path.join(config["s3"]["bucket"], "output")

    transformer = model.transformer(
        instance_count=config["batch_transform_job"]["instance_count"],
        instance_type=config["batch_transform_job"]["instance_type"],
        output_path=output_path,
    )

    input_path = "s3://" + os.path.join(config["s3"]["bucket"],
                                        config["s3"]["input_file"])

    transformer.transform(input_path, content_type="text/csv")

    logging.info('batch transform job starting')
    transformer.wait()

    logging.info("downloading recommendations from S3")
    bucket_folder = transformer.output_path.rsplit("/")[3]
    key = os.path.join(bucket_folder,
                       config["s3"]["input_file"].split("/")[-1] + ".out")

    s3.download(key, args.output)

    logging.info("Push recommendations to Woocommerce")
    woocommerce.upload_recommendations(args.output)