def test_marketplace_tuning_job(sagemaker_session, cpu_instance_type): data_path = os.path.join(DATA_DIR, "marketplace", "training") region = sagemaker_session.boto_region_name account = REGION_ACCOUNT_MAP[region] algorithm_arn = ALGORITHM_ARN % (region, account) mktplace = AlgorithmEstimator( algorithm_arn=algorithm_arn, role="SageMakerRole", train_instance_count=1, train_instance_type=cpu_instance_type, sagemaker_session=sagemaker_session, base_job_name="test-marketplace", ) train_input = mktplace.sagemaker_session.upload_data( path=data_path, key_prefix="integ-test-data/marketplace/train") mktplace.set_hyperparameters(max_leaf_nodes=10) hyperparameter_ranges = {"max_leaf_nodes": IntegerParameter(1, 100000)} tuner = HyperparameterTuner( estimator=mktplace, base_tuning_job_name="byo", objective_metric_name="validation:accuracy", hyperparameter_ranges=hyperparameter_ranges, max_jobs=2, max_parallel_jobs=2, ) tuner.fit({"training": train_input}, include_cls_metadata=False) time.sleep(15) tuner.wait()
def test_marketplace_transform_job(sagemaker_session, cpu_instance_type): data_path = os.path.join(DATA_DIR, "marketplace", "training") region = sagemaker_session.boto_region_name account = REGION_ACCOUNT_MAP[region] algorithm_arn = ALGORITHM_ARN.format(partition=_aws_partition(region), region=region, account=account) algo = AlgorithmEstimator( algorithm_arn=algorithm_arn, role="SageMakerRole", train_instance_count=1, train_instance_type=cpu_instance_type, sagemaker_session=sagemaker_session, base_job_name="test-marketplace", ) train_input = algo.sagemaker_session.upload_data( path=data_path, key_prefix="integ-test-data/marketplace/train") shape = pandas.read_csv(data_path + "/iris.csv", header=None).drop([0], axis=1) transform_workdir = DATA_DIR + "/marketplace/transform" shape.to_csv(transform_workdir + "/batchtransform_test.csv", index=False, header=False) transform_input = algo.sagemaker_session.upload_data( transform_workdir, key_prefix="integ-test-data/marketplace/transform") algo.fit({"training": train_input}) transformer = algo.transformer(1, cpu_instance_type) transformer.transform(transform_input, content_type="text/csv") transformer.wait()
def test_marketplace_estimator(sagemaker_session): with timeout(minutes=15): data_path = os.path.join(DATA_DIR, 'marketplace', 'training') region = sagemaker_session.boto_region_name account = REGION_ACCOUNT_MAP[region] algorithm_arn = ALGORITHM_ARN % (region, account) algo = AlgorithmEstimator(algorithm_arn=algorithm_arn, role='SageMakerRole', train_instance_count=1, train_instance_type='ml.c4.xlarge', sagemaker_session=sagemaker_session) train_input = algo.sagemaker_session.upload_data( path=data_path, key_prefix='integ-test-data/marketplace/train') algo.fit({'training': train_input}) endpoint_name = 'test-marketplace-estimator{}'.format( sagemaker_timestamp()) with timeout_and_delete_endpoint_by_name(endpoint_name, sagemaker_session, minutes=20): predictor = algo.deploy(1, 'ml.m4.xlarge', endpoint_name=endpoint_name) shape = pandas.read_csv(os.path.join(data_path, 'iris.csv'), header=None) a = [50 * i for i in range(3)] b = [40 + i for i in range(10)] indices = [i + j for i, j in itertools.product(a, b)] test_data = shape.iloc[indices[:-1]] test_x = test_data.iloc[:, 1:] print(predictor.predict(test_x.values).decode('utf-8'))
def test_marketplace_transform_job(sagemaker_session): data_path = os.path.join(DATA_DIR, 'marketplace', 'training') region = sagemaker_session.boto_region_name account = REGION_ACCOUNT_MAP[region] algorithm_arn = ALGORITHM_ARN % (region, account) algo = AlgorithmEstimator(algorithm_arn=algorithm_arn, role='SageMakerRole', train_instance_count=1, train_instance_type='ml.c4.xlarge', sagemaker_session=sagemaker_session, base_job_name='test-marketplace') train_input = algo.sagemaker_session.upload_data( path=data_path, key_prefix='integ-test-data/marketplace/train') shape = pandas.read_csv(data_path + '/iris.csv', header=None).drop([0], axis=1) transform_workdir = DATA_DIR + '/marketplace/transform' shape.to_csv(transform_workdir + '/batchtransform_test.csv', index=False, header=False) transform_input = algo.sagemaker_session.upload_data( transform_workdir, key_prefix='integ-test-data/marketplace/transform') algo.fit({'training': train_input}) transformer = algo.transformer(1, 'ml.m4.xlarge') transformer.transform(transform_input, content_type='text/csv') transformer.wait()
def test_marketplace_tuning_job(sagemaker_session): data_path = os.path.join(DATA_DIR, 'marketplace', 'training') region = sagemaker_session.boto_region_name account = REGION_ACCOUNT_MAP[region] algorithm_arn = ALGORITHM_ARN % (region, account) mktplace = AlgorithmEstimator(algorithm_arn=algorithm_arn, role='SageMakerRole', train_instance_count=1, train_instance_type='ml.c4.xlarge', sagemaker_session=sagemaker_session, base_job_name='test-marketplace') train_input = mktplace.sagemaker_session.upload_data( path=data_path, key_prefix='integ-test-data/marketplace/train') mktplace.set_hyperparameters(max_leaf_nodes=10) hyperparameter_ranges = {'max_leaf_nodes': IntegerParameter(1, 100000)} tuner = HyperparameterTuner(estimator=mktplace, base_tuning_job_name='byo', objective_metric_name='validation:accuracy', hyperparameter_ranges=hyperparameter_ranges, max_jobs=2, max_parallel_jobs=2) tuner.fit({'training': train_input}, include_cls_metadata=False) time.sleep(15) tuner.wait()
def test_marketplace_attach(sagemaker_session, cpu_instance_type): with timeout(minutes=15): data_path = os.path.join(DATA_DIR, "marketplace", "training") region = sagemaker_session.boto_region_name account = REGION_ACCOUNT_MAP[region] algorithm_arn = ALGORITHM_ARN.format(partition=_aws_partition(region), region=region, account=account) mktplace = AlgorithmEstimator( algorithm_arn=algorithm_arn, role="SageMakerRole", train_instance_count=1, train_instance_type=cpu_instance_type, sagemaker_session=sagemaker_session, base_job_name="test-marketplace", ) train_input = mktplace.sagemaker_session.upload_data( path=data_path, key_prefix="integ-test-data/marketplace/train") mktplace.fit({"training": train_input}, wait=False) training_job_name = mktplace.latest_training_job.name print("Waiting to re-attach to the training job: %s" % training_job_name) time.sleep(20) endpoint_name = "test-marketplace-estimator{}".format( sagemaker_timestamp()) with timeout_and_delete_endpoint_by_name(endpoint_name, sagemaker_session, minutes=20): print("Re-attaching now to: %s" % training_job_name) estimator = AlgorithmEstimator.attach( training_job_name=training_job_name, sagemaker_session=sagemaker_session) predictor = estimator.deploy( 1, cpu_instance_type, endpoint_name=endpoint_name, serializer=sagemaker.predictor.csv_serializer, ) shape = pandas.read_csv(os.path.join(data_path, "iris.csv"), header=None) a = [50 * i for i in range(3)] b = [40 + i for i in range(10)] indices = [i + j for i, j in itertools.product(a, b)] test_data = shape.iloc[indices[:-1]] test_x = test_data.iloc[:, 1:] print(predictor.predict(test_x.values).decode("utf-8"))
def test_marketplace_attach(sagemaker_session): with timeout(minutes=15): data_path = os.path.join(DATA_DIR, 'marketplace', 'training') region = sagemaker_session.boto_region_name account = REGION_ACCOUNT_MAP[region] algorithm_arn = ALGORITHM_ARN % (region, account) mktplace = AlgorithmEstimator(algorithm_arn=algorithm_arn, role='SageMakerRole', train_instance_count=1, train_instance_type='ml.c4.xlarge', sagemaker_session=sagemaker_session, base_job_name='test-marketplace') train_input = mktplace.sagemaker_session.upload_data( path=data_path, key_prefix='integ-test-data/marketplace/train') mktplace.fit({'training': train_input}, wait=False) training_job_name = mktplace.latest_training_job.name print('Waiting to re-attach to the training job: %s' % training_job_name) time.sleep(20) endpoint_name = 'test-marketplace-estimator{}'.format( sagemaker_timestamp()) with timeout_and_delete_endpoint_by_name(endpoint_name, sagemaker_session, minutes=20): print('Re-attaching now to: %s' % training_job_name) estimator = AlgorithmEstimator.attach( training_job_name=training_job_name, sagemaker_session=sagemaker_session) predictor = estimator.deploy( 1, 'ml.m4.xlarge', endpoint_name=endpoint_name, serializer=sagemaker.predictor.csv_serializer) shape = pandas.read_csv(os.path.join(data_path, 'iris.csv'), header=None) a = [50 * i for i in range(3)] b = [40 + i for i in range(10)] indices = [i + j for i, j in itertools.product(a, b)] test_data = shape.iloc[indices[:-1]] test_x = test_data.iloc[:, 1:] print(predictor.predict(test_x.values).decode('utf-8'))
def test_marketplace_estimator(sagemaker_session, cpu_instance_type): with timeout(minutes=15): data_path = os.path.join(DATA_DIR, "marketplace", "training") region = sagemaker_session.boto_region_name account = REGION_ACCOUNT_MAP[region] algorithm_arn = ALGORITHM_ARN.format(partition=_aws_partition(region), region=region, account=account) algo = AlgorithmEstimator( algorithm_arn=algorithm_arn, role="SageMakerRole", train_instance_count=1, train_instance_type=cpu_instance_type, sagemaker_session=sagemaker_session, ) train_input = algo.sagemaker_session.upload_data( path=data_path, key_prefix="integ-test-data/marketplace/train") algo.fit({"training": train_input}) endpoint_name = "test-marketplace-estimator{}".format( sagemaker_timestamp()) with timeout_and_delete_endpoint_by_name(endpoint_name, sagemaker_session, minutes=20): predictor = algo.deploy(1, cpu_instance_type, endpoint_name=endpoint_name) shape = pandas.read_csv(os.path.join(data_path, "iris.csv"), header=None) a = [50 * i for i in range(3)] b = [40 + i for i in range(10)] indices = [i + j for i, j in itertools.product(a, b)] test_data = shape.iloc[indices[:-1]] test_x = test_data.iloc[:, 1:] print(predictor.predict(test_x.values).decode("utf-8"))