def test_get_execution_role_throws_exception_if_arn_is_not_role(): session = Mock() session.get_caller_identity_arn.return_value = 'arn:aws:iam::369233609183:user/marcos' with pytest.raises(ValueError) as error: get_execution_role(session) assert 'ValueError: The current AWS identity is not a role' in str(error)
def test_get_execution_role_works_with_service_role(): session = Mock() session.get_caller_identity_arn.return_value = \ 'arn:aws:iam::369233609183:role/service-role/AmazonSageMaker-ExecutionRole-20171129T072388' actual = get_execution_role(session) assert actual == 'arn:aws:iam::369233609183:role/service-role/AmazonSageMaker-ExecutionRole-20171129T072388'
import sagemaker from pathlib import Path def get_current_folder(global_variables): # if calling from a file if "__file__" in global_variables: current_file = Path(global_variables["__file__"]) current_folder = current_file.parent.resolve() # if calling from a notebook else: current_folder = Path(os.getcwd()) return current_folder default_bucket = sagemaker.session.Session(boto3.session.Session()).default_bucket() default_role = sagemaker.get_execution_role() cfn_stack_outputs = {} current_folder = get_current_folder(globals()) cfn_stack_outputs_filepath = Path(current_folder, '../stack_outputs.json').resolve() if os.path.exists(cfn_stack_outputs_filepath): with open(cfn_stack_outputs_filepath) as f: cfn_stack_outputs = json.load(f) solution_prefix = cfn_stack_outputs.get('SolutionPrefix', 'sm-soln-pred-maint') solution_bucket = cfn_stack_outputs.get('SolutionS3Bucket', default_bucket) s3_prefix = cfn_stack_outputs.get('SolutionS3Prefix', 'pred-maintenance-artifacts') training_job_name = cfn_stack_outputs.get('SageMakerTrainingJobName', 'sm-soln-pred-maint-model')
import time t1 = time.time() # Split training, validation, and test ys = np.array(df['price']).astype("float32") xs = np.array(df['sqft_living']).astype("float32").reshape(-1, 1) np.random.seed(8675309) train_features, test_features, train_labels, test_labels = train_test_split( xs, ys, test_size=0.2) val_features, test_features, val_labels, test_labels = train_test_split( test_features, test_labels, test_size=0.5) # Train model linear_model = sagemaker.LinearLearner(role=sagemaker.get_execution_role(), instance_count=1, instance_type='ml.m4.xlarge', predictor_type='regressor') train_records = linear_model.record_set(train_features, train_labels, channel='train') val_records = linear_model.record_set(val_features, val_labels, channel='validation') test_records = linear_model.record_set(test_features, test_labels, channel='test') linear_model.fit([train_records, val_records, test_records], logs=False)
# Replace **`<LabBucketName>`** with the resource name that was provided with your lab account. # In[21]: import boto3 import sagemaker from sagemaker import get_execution_role from sagemaker.amazon.amazon_estimator import image_uris from sagemaker.amazon.amazon_estimator import RecordSet # Instantiate an Amazon SageMaker session sess = sagemaker.Session() # Get the Amazon SageMaker role role = get_execution_role() # Bucket name bucket = 'ml-pipeline-bucket' # Get the image URI for the container that includes the linear learner algorithm container = image_uris.retrieve('linear-learner',boto3.Session().region_name) print(f'Session {sess}') print(f'The role is {role}') print(f'The container is {role} in the {boto3.Session().region_name} region') # In[22]:
def test_get_execution_role(): session = Mock() session.get_caller_identity_arn.return_value = 'arn:aws:iam::369233609183:role/SageMakerRole' actual = get_execution_role(session) assert actual == 'arn:aws:iam::369233609183:role/SageMakerRole'
def setup_workflow(project, purpose, workflow_execution_role, script_dir, ecr_repository): """ to setup all needed for a step function with sagemaker. arg: project: project name under sagemaker purpose: subproject workflow_execution_role: arn to execute step functions script_dir: processing file name, like a .py file ecr_repository: ecr repository name return: workflow: a stepfunctions.workflow.Workflow instance example: PROJECT = '[dpt-proj-2022]' PURPOSE = '[processing]' WORKFLOW_EXECUTION_ROLE = "arn:aws-cn:iam::[*********]:role/[**************]" SCRIPT_DIR = "[processing].py" ECR_REPOSITORY = '[ecr-2022]' """ # SageMaker Session setup # ======================================================================================== # SageMaker Session # ==================================== account_id = boto3.client('sts').get_caller_identity().get('Account') role = sagemaker.get_execution_role() # Storage # ==================================== session = sagemaker.Session() region = session.boto_region_name s3_output = session.default_bucket() # Code storage # ================== s3_prefix = '{}/{}'.format(project, purpose) s3_prefix_code = '{}/code'.format(s3_prefix) s3CodePath = 's3://{}/{}/code'.format(s3_output, s3_prefix) ## preprocess & prediction script_list = [script_dir] for script in script_list: session.upload_data(script, bucket=session.default_bucket(), key_prefix=s3_prefix_code) # ECR environment # ==================================== uri_suffix = 'amazonaws.com.cn' tag = ':latest' ecr_repository_uri = '{}.dkr.ecr.{}.{}/{}'.format(account_id, region, uri_suffix, ecr_repository + tag) # SageMaker Experiments setup # ======================================================================================== experiment = Experiment.create( experiment_name="{}-{}".format(project, int(time.time())), description="machine learning project", sagemaker_boto_client=boto3.client('sagemaker')) print(experiment) execution_input = ExecutionInput(schema={ "ProcessingJobName": str, "ResultPath": str, }) # setup script processor script_processor = ScriptProcessor(command=['python3'], image_uri=ecr_repository_uri, role=role, instance_count=1, instance_type='ml.m5.4xlarge') # Step # ======================================================================================== optimizing_step = steps.ProcessingStep( "Processing Step", processor=script_processor, job_name=execution_input["ProcessingJobName"], inputs=[ ProcessingInput(source=s3CodePath, destination='/opt/ml/processing/input/code', input_name='code') ], outputs=[ ProcessingOutput(output_name=purpose, destination=execution_input["ResultPath"], source='/opt/ml/processing/{}'.format(purpose)) ], container_entrypoint=[ "python3", "/opt/ml/processing/input/code/" + script_dir ], ) # Fail Sate # ======================================================================================== failed_state = steps.states.Fail("Processing Workflow failed", cause="SageMakerProcessingJobFailed") catch_state_processing = steps.states.Catch( error_equals=["States.TaskFailed"], next_step=failed_state) # Create Workflow # ======================================================================================== optimizing_step.add_catch(catch_state_processing) workflow_name = workflow_name = "workflow-{}-{}".format(project, purpose).upper() workflow_graph = steps.Chain([optimizing_step]) workflow = Workflow(name=workflow_name, definition=workflow_graph, role=workflow_execution_role) workflow.create() return workflow
def role(sagemaker_session): return get_execution_role(sagemaker_session)
print(f"Length of train_labels is: {train_labels.shape}") print(f"Length of val_features is: {val_features.shape}") print(f"Length of val_labels is: {val_labels.shape}") print(f"Length of test_features is: {test_features.shape}") print(f"Length of test_labels is: {test_labels.shape}") # Now, call the Amazon SageMaker `LinearLearner()` algorithm. This example uses an `ml.m4.xlarge` instance for training. `predictor_type` is set to __'binary_classifier'__ because there are two classes: "spam" and "not spam". # In[7]: import sagemaker # Call the LinearLearner estimator object binary_estimator = sagemaker.LinearLearner(role=sagemaker.get_execution_role(), instance_count=1, instance_type='ml.m4.xlarge', predictor_type='binary_classifier') # Use the `record_set()` function of the binary_estimator to set the training, validation, and test parts of the estimator. # In[8]: train_records = binary_estimator.record_set(train_features, train_labels, channel='train') val_records = binary_estimator.record_set(val_features, val_labels, channel='validation') test_records = binary_estimator.record_set(test_features, test_labels, channel='test')
def get_credentials(): """Retrieve and return sagemaker credentials: session, role, S3 bucket""" sage_session = sagemaker.Session() role = sagemaker.get_execution_role() bucket = sage_session.default_bucket() return sage_session, role, bucket
def build_model(model_name, app, desc): # role common_prefix = "DEMO-gluoncv-model-zoo" training_input_prefix = common_prefix + "/training-input-data" role = get_execution_role() sess = sage.Session() # create estimator account = sess.boto_session.client('sts').get_caller_identity()['Account'] region = sess.boto_session.region_name image = '{}.dkr.ecr.{}.amazonaws.com/gluoncv-{}:latest'.format( account, region, app.replace('_', '-')) TRAINING_WORKDIR = "data/training" training_input = sess.upload_data(TRAINING_WORKDIR, key_prefix=training_input_prefix) # print ("Training Data Location " + training_input) classifier = sage.estimator.Estimator( image, role, 1, 'ml.c4.xlarge', output_path="s3://{}/output".format(sess.default_bucket()), sagemaker_session=sess, hyperparameters={'model_name': model_name}) classifier.fit(training_input) TRANSFORM_WORKDIR = "data/transform" batch_inference_input_prefix = common_prefix + "/batch-inference-input-data" transform_input = sess.upload_data( TRANSFORM_WORKDIR, key_prefix=batch_inference_input_prefix) + "/cat1.jpg" # deploy if args.deploy_test: model = classifier.create_model() predictor = classifier.deploy(1, 'ml.m4.xlarge') with open('data/transform/cat1.jpg', 'rb') as f: x = f.read() # print(predictor.predict(x, initial_args={'ContentType':'image/jpeg'}).decode('utf-8')) sess.delete_endpoint(predictor.endpoint) smmp = boto3.client( 'sagemaker', region_name=region, endpoint_url="https://sagemaker.{}.amazonaws.com".format(region)) modelpackage_inference_specification = InferenceSpecification( ).get_inference_specification_dict( ecr_image=image, supports_gpu=True, supported_content_types=["image/jpeg", "image/png"], supported_mime_types=["text/plain", "application/json"]) # Specify the model data resulting from the previously completed training job modelpackage_inference_specification["InferenceSpecification"][ "Containers"][0]["ModelDataUrl"] = classifier.model_data # validation specificiation modelpackage_validation_specification = ModelPackageValidationSpecification( ).get_validation_specification_dict(validation_role=role, batch_transform_input=transform_input, content_type="image/jpeg", instance_type="ml.c4.xlarge", output_s3_location='s3://{}/{}'.format( sess.default_bucket(), common_prefix)) model_package_name = "gluoncv-{}-".format( model_name.replace('_', '-').replace('.', '-')) + str( round(time.time())) create_model_package_input_dict = { "ModelPackageName": model_package_name, "ModelPackageDescription": model_name + desc, "CertifyForMarketplace": True } create_model_package_input_dict.update( modelpackage_inference_specification) create_model_package_input_dict.update( modelpackage_validation_specification) smmp.create_model_package(**create_model_package_input_dict) while True: response = smmp.describe_model_package( ModelPackageName=model_package_name) status = response["ModelPackageStatus"] # print (model_name, ':', status) if (status == "Completed" or status == "Failed"): #print (response["ModelPackageStatusDetails"]) break time.sleep(5)
parser.add_argument('--max-parallel-jobs', type=int, default=3, help='Maximum parallel jobs') parser.add_argument('--wait', dest='wait', action='store_true') parser.add_argument('--no-wait', dest='wait', action='store_false') parser.set_defaults(wait=True) args = parser.parse_args() sagemaker_session = sagemaker.Session() bucket = sagemaker_session.default_bucket() prefix = 'sagemaker/pytorch-cifar10' # If you are running this outside of a sagemaker notebook, you must set SAGEMAKER_ROLE role = os.getenv('SAGEMAKER_ROLE') or sagemaker.get_execution_role() wandb.sagemaker_auth(path="source") # Ensure training data is stored in s3 inputs = "s3://{}/{}".format(bucket, prefix) try: file = os.path.join(prefix, 'cifar-10-python.tar.gz') sagemaker_session.boto_session.resource('s3').Object(bucket, file).load() except botocore.exceptions.ClientError as e: transform = transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)) ]) trainset = torchvision.datasets.CIFAR10('data', download=True,
import boto3 from torchvision import datasets, transforms import sagemaker from sagemaker.pytorch import PyTorch from sagemaker.tuner import IntegerParameter, CategoricalParameter, ContinuousParameter, HyperparameterTuner DIR_PATH = os.path.dirname(os.path.realpath(__file__)) if __name__ == '__main__': boto_session = boto3.Session(profile_name="packt-sagemaker") sagemaker_session = sagemaker.Session(boto_session=boto_session) bucket = sagemaker_session.default_bucket() prefix = 'sagemaker/DEMO-pytorch-fashion-mnist' role = sagemaker.get_execution_role(sagemaker_session) datasets.FashionMNIST( 'data', download=True, transform=transforms.Compose([ transforms.ToTensor( ) # Convert a PIL Image or numpy.ndarray to tensor, default : range [0, 255] -> [0.0,1.0]. ])) inputs = sagemaker_session.upload_data(path='data', bucket=bucket, key_prefix=prefix) print('input spec (in this case, just an S3 path): {}'.format(inputs)) entry_point_path = os.path.join(DIR_PATH, "fashion_mnist.py")
def train(source_dir, data_path='doodle/data', training_steps=20000, evaluation_steps=2000, train_instance_type='local', train_instance_count=1, run_tensorboard_locally=True, uid=None, role=None, bucket=None, profile_name=None): assert os.path.exists(source_dir) boto_session = boto3.Session(profile_name=profile_name) session = sagemaker.Session(boto_session=boto_session) role = role if role is not None else sagemaker.get_execution_role() bucket = bucket if bucket is not None else session.default_bucket() uid = uid if uid is not None else uuid4() logger.debug(session.get_caller_identity_arn()) role = session.expand_role(role) params = { 'train_tfrecord_file': 'train.tfr', 'test_tfrecord_file': 'test.tfr', 'samples_per_epoch': 700000, 'save_summary_steps': 100, } output_path = 's3://{}/doodle/model/{}/export'.format(bucket, uid) checkpoint_path = 's3://{}/doodle/model/{}/ckpt'.format(bucket, uid) code_location = 's3://{}/doodle/model/{}/source'.format(bucket, uid) base_job_name = 'doodle-training-job-{}'.format(uid) data_dir = 's3://{}/{}'.format(bucket, data_path) logger.info('uid : {}'.format(uid)) logger.info('execution_role : {}'.format(role)) logger.info('data_dir : {}'.format(data_dir)) logger.info('output_path : {}'.format(output_path)) logger.info('checkpoint_path : {}'.format(checkpoint_path)) logger.info('code_location : {}'.format(code_location)) logger.info('base_job_name : {}'.format(base_job_name)) logger.info('training_steps : {}'.format(training_steps)) logger.info('evaluation_steps : {}'.format(evaluation_steps)) logger.info('train_instance_count : {}'.format(train_instance_count)) logger.info('train_instance_type : {}'.format(train_instance_type)) logger.info('hyperparameters : {}'.format(json.dumps(params))) estimator = TensorFlow(hyperparameters=params, output_path=output_path, checkpoint_path=checkpoint_path, code_location=code_location, base_job_name=base_job_name, source_dir=source_dir, entry_point='doodle.py', framework_version='1.6', role=role, training_steps=training_steps, evaluation_steps=evaluation_steps, train_instance_count=train_instance_count, train_instance_type=train_instance_type) estimator.fit(data_dir, run_tensorboard_locally=run_tensorboard_locally)
#70 DATA FROM TARINING & 30% data for vaildation #Code-1--------------------------------------------------------------------------------------------------------------- from sagemaker import get_execution_role #Bucket location to save your custom code in tar.gz format. custom_code_upload_location = 's3://sagemaker-02122018/customcode/tensorflow_iris' #Bucket location where results of model training are saved. model_artifacts_location = 's3://sagemaker-02122018/artifacts' #IAM execution role that gives SageMaker access to resources in your AWS account. role = get_execution_role()#The get_execution_role function retrieves the IAM role you created at the time of creating your notebook instance. #Code-2--------------------------------------------------------------------------------------------------------------- !cat "iris_dnn_classifier.py" #Code-3--------------------------------------------------------------------------------------------------------------- from sagemaker.tensorflow import TensorFlow #Importing tensorflow class from sagemaker iris_estimator = TensorFlow(entry_point='iris_dnn_classifier.py', #Creating Estimator,etreing python scrypt role=role, #IAM Role framework_version='1.11.0', output_path=model_artifacts_location, code_location=custom_code_upload_location, train_instance_count=1, #no of instences train_instance_type='ml.c4.xlarge', training_steps=1000,
def _get_default_role(): role = get_execution_role() if 'partial_role' in _defaults: role = _defaults['partial_role'] return role
sm = boto3.Session().client(service_name='sagemaker', region_name=region) # トレーニングを開始する sm.create_auto_ml_job(AutoMLJobName=auto_ml_job_name, InputDataConfig=[{ 'DataSource': { 'S3DataSource': { 'S3DataType': 'S3Prefix', 'S3Uri': s3_data_dir_path } }, 'TargetAttributeName': target_column_name }], OutputDataConfig={'S3OutputPath': s3_data_output_path}, RoleArn=get_execution_role()) # トレーニング状態を確認する while True: resp = sm.describe_auto_ml_job(AutoMLJobName=auto_ml_job_name) if resp['AutoMLJobStatus'] == 'Completed': break time.sleep(10) # 最も精度の良い候補でモデルを作成する best_candidate = resp['BestCandidate'] model_name = auto_ml_job_name + '-model' model_arn = sm.create_model(Containers=best_candidate['InferenceContainers'], ModelName=model_name, ExecutionRoleArn=role)
def train_deploy_chat_bert(): role = sagemaker.get_execution_role() session = sagemaker.Session() # location for train.csv, val.csv and labels.csv DATA_PATH = Path("../data/") # Location for storing training_config.json CONFIG_PATH = DATA_PATH/'config' CONFIG_PATH.mkdir(exist_ok=True) # S3 bucket name bucket = 'sagemaker-deep-learning-bert-v2' # Prefix for S3 bucket for input and output prefix = 'sagemaker-deep-learning-bert-v2/input' prefix_output = 'sagemaker-deep-learning-bert-v2/output' hyperparameters = dict(epochs=10, lr=8e-5, max_seq_length=512, train_batch_size=16, lr_schedule="warmup_cosine", warmup_steps=1000, optimizer_type="adamw") training_config = dict(run_text="text-classification-bert", finetuned_model=None, do_lower_case="True", train_file="train.csv", val_file="val.csv", label_file="labels.csv", text_col="message_body", label_col='["abusive","asking_exchange","normal","offline_sell","possible_fraud","sharing_contact_details"]', multi_label="True", grad_accumulation_steps="1", fp16_opt_level="O1", fp16="True", model_type="roberta", model_name="roberta-base", logging_steps="300") with open(CONFIG_PATH/'training_config.json', 'w') as f: json.dump(training_config, f) # This is a feature to upload data to S3 bucket s3_input = session.upload_data(DATA_PATH, bucket=bucket , key_prefix=prefix) session.upload_data(str(DATA_PATH/'labels.csv'), bucket=bucket , key_prefix=prefix) session.upload_data(str(DATA_PATH/'train.csv'), bucket=bucket , key_prefix=prefix) session.upload_data(str(DATA_PATH/'val.csv'), bucket=bucket , key_prefix=prefix) # Creating an Estimator and start training account = session.boto_session.client('sts').get_caller_identity()['Account'] region = session.boto_session.region_name image = "{}.dkr.ecr.{}.amazonaws.com/fluent-sagemaker-fast-bert:1.0-gpu-py36".format(account, region) output_path = "s3://{}/{}".format(bucket, prefix_output) estimator = sagemaker.estimator.Estimator(image, role, train_instance_count=1, train_instance_type='ml.p2.xlarge', output_path=output_path, base_job_name='bert-text-classification-v1', hyperparameters=hyperparameters, sagemaker_session=session ) estimator.fit(s3_input) # Deploy the model to hosting service predictor = estimator.deploy(1, 'ml.m5.large', endpoint_name='bert-text-classification-v1', serializer=json_serializer)
'servable_model_dir': '/opt/ml/model', 'loss_weight': 1.0, 'use_context': True, 'max_click_history': 30, 'num_epochs': 1, 'max_title_length': 16, 'entity_dim': 128, 'word_dim': 300, 'batch_size': 128, 'perform_shuffle': 1, 'checkpointPath': '/opt/ml/checkpoints' } byoc_est = sagemaker.estimator.Estimator( '662566784674.dkr.ecr.ap-northeast-1.amazonaws.com/gw-dkn:20201114025113', role=sagemaker.get_execution_role(), train_instance_count=1, train_instance_type=train_instance_type, base_job_name='dkn-byoc', hyperparameters=hyperparameters) train_s3 = "s3://leigh-gw/train.csv/" test_s3 = "s3://leigh-gw/test.csv/" inputs = {'train': train_s3, 'eval': test_s3} train_config = training_config(estimator=byoc_est, inputs=inputs) # step - trigger CDK to deploy model as ECS service using Airflow Python Operator def dkn_model_deploy(data, **context): print("mock for dkn deployment")
def __init__(self, aws_profile, aws_region): self.boto_session = boto3.Session(profile_name=aws_profile, region_name=aws_region) self.sagemaker_session = sage.Session(boto_session=self.boto_session) self.role = sage.get_execution_role(self.sagemaker_session)
import s3fs import sagemaker from sagemaker import get_execution_role #%% [markdown] # Start by specifying: # - The Amazon S3 bucket and prefix that you want to use for the training and model data. This should be within the same region as the notebook, training, and hosting instances. Replace `'FILL_IN_LAB_BUCKET_NAME'` below with the name of the lab's s3 bucket; this can be found in Qwiklabs along the left-hand side, under *LabBucket*. Just copy and paste that bucket name into the appropriate location in the code cell below before running the cell. # - The IAM role ARN is used to give training and hosting access to your data. See the documentation for how to create these. Here, the `get_execution_role` function obtains the role ARN, which was specified when creating the notebook. #%% bucket = 'FILL_IN_LAB_BUCKET_NAME' prefix = 'demos/deepar/forecast-electricity' assert bucket != 'FILL_IN_LAB_BUCKET_NAME', 'Student needs to set bucket to match the lab S3 bucket name.' sagemaker_session = sagemaker.Session() role = get_execution_role() s3_data_path = "{}/{}/data".format(bucket, prefix) s3_output_path = "{}/{}/output".format(bucket, prefix) print('Data location: %s' % s3_data_path) print('Output location: %s' % s3_output_path) #%% [markdown] # # Real Data Set: Electricity Consumption <a name="data"></a> # # **Dataset License and Information:** # # For this lab, you will be using an open source dataset entitled, [“Individual Household Electric Power Consumption”](https://archive.ics.uci.edu/ml/datasets/Individual+household+electric+power+consumption) that comes from the UCI Machine Learning Repository. Information about the dataset license can be found below. # # The MIT License (MIT) Copyright © [2017] Zalando SE, [https://tech.zalando.com](https://tech.zalando.com). #
"""Creates a FLow Definition ARN for A2I""" import boto3 import io import json import uuid import botocore import time import botocore from sagemaker import get_execution_role ROLE = get_execution_role() REGION = "" WORKTEAM_ARN = "" HUMAN_TASK_ARN = "" BUCKET = "" OUTPUT_PATH = f's3://{BUCKET}/a2i_results' def create_flow_definition(flow_definition_name, human_task_arn, task_title): ''' Creates Flow Definition resource. Returns FlowDefinitionArn ''' # Amazon SageMaker client SAGEMAKER_CLIENT = boto3.client('sagemaker', REGION) # A2I Runtime client A2I_RUNTIME_CLIENT = boto3.client('sagemaker-a2i-runtime', REGION) response = SAGEMAKER_CLIENT.create_flow_definition( FlowDefinitionName=flow_definition_name,
""" runs script train_val_test_split on an ec2 instance as a sagemaker processing job """ from sagemaker.processing import ScriptProcessor, ProcessingInput, ProcessingOutput import sagemaker import boto3 ## locations and vars BUCKET = sagemaker.Session().default_bucket() INPUT_FOLDER = 'stock-data-raw-csv' OUTPUT_FOLDER = 'DEMO-xgboost-as-a-built-in-algo' ROLE_ARN = sagemaker.get_execution_role() ## image uri code ACCOUNT_ID = boto3.client('sts').get_caller_identity().get('Account') REGION = boto3.Session().region_name ECR_REPOSITORY = 'sagemaker-processing-container' TAG = ':latest' IMAGE_URI = '{}.dkr.ecr.{}.amazonaws.com/{}'.format(ACCOUNT_ID, REGION, ECR_REPOSITORY + TAG) ## call processing job script_processor = ScriptProcessor(command=['python3'], image_uri=IMAGE_URI, role=ROLE_ARN, instance_count=1, instance_type='ml.m5.xlarge') script_processor.run( code='train_val_test_split.py', inputs=[ ProcessingInput(source=f's3://{BUCKET}/{INPUT_FOLDER}/', destination='/opt/ml/processing/input') ],
#Enviroment import sagemaker bucket = sagemaker.Session().default_bucket() # we are using a default bucket here but you can change it to any bucket in your account prefix = 'sagemaker/DEMO-hpo-tensorflow-high' # you can customize the prefix (subfolder) here role = sagemaker.get_execution_role() # we are using the notebook instance role for training in this example import boto3 from time import gmtime, strftime from sagemaker.tensorflow import TensorFlow from sagemaker.tuner import IntegerParameter, CategoricalParameter, ContinuousParameter, HyperparameterTuner #Download MNIST dataset import utils from tensorflow.contrib.learn.python.learn.datasets import mnist import tensorflow as tf data_sets = mnist.read_data_sets('data', dtype=tf.uint8, reshape=False, validation_size=5000) utils.convert_to(data_sets.train, 'train', 'data') utils.convert_to(data_sets.validation, 'validation', 'data') utils.convert_to(data_sets.test, 'test', 'data') #Upload the data inputs = sagemaker.Session().upload_data(path='data', bucket=bucket, key_prefix=prefix+'/data/mnist') print (inputs) #Script for distributed training
def main(): # define some configurations from env # AWS especific AWS_DEFAULT_REGION = os.getenv('AWS_DEFAULT_REGION', None) AWS_PROFILE = os.getenv('AWS_PROFILE', None) AWS_ACCESS_KEY_ID = os.getenv('AWS_ACCESS_KEY_ID', None) AWS_SECRET_ACCESS_KEY = os.getenv('AWS_SECRET_ACCESS_KEY', None) b3_session, sm_client, sm_runtime, sm_session = get_sm_session( region=AWS_DEFAULT_REGION, profile_name=AWS_PROFILE, aws_access_key_id=AWS_ACCESS_KEY_ID, aws_secret_access_key=AWS_SECRET_ACCESS_KEY) ROLE_ARN = os.getenv('AWS_ROLE', sagemaker.get_execution_role()) # MLOps especific PIPELINE_NAME = os.getenv('PIPELINE_NAME', 'stsPipeline') MODEL_PACKAGE_GROUP_NAME = os.getenv('MODEL_PACKAGE_GROUP_NAME', 'sts-sklearn-grp') BASE_JOB_PREFIX = os.getenv('BASE_JOB_PREFIX', 'sts') outputs = {'pipeline': None, 'baseline': None, 'train': None} try: # define the ml pipeline for training pipe = get_pipeline(AWS_DEFAULT_REGION, sm_session, role=ROLE_ARN, pipeline_name=PIPELINE_NAME, model_package_group_name=MODEL_PACKAGE_GROUP_NAME, base_job_prefix=BASE_JOB_PREFIX) # output debug information parsed = json.loads(pipe.definition()) outputs['pipeline'] = parsed _l.debug('ML Pipeline definition') _l.debug(json.dumps(parsed, indent=2, sort_keys=True)) # Created/Updated SageMaker Pipeline upsert_response = pipe.upsert(role_arn=ROLE_ARN) _l.debug( f"C/U SageMaker Pipeline: Response received: {upsert_response}") _l.info("Starting the SageMaker pipeline") execution = pipe.start() _l.info("Waiting for the pipeline") execution.wait() _l.info("Pipeline finished: !!!") _l.debug(f"{pprint.pformat(execution.list_steps())}") # Take the s3 uri of the baseline datatase baseline.csv mse_step = extract_step_from_list(parsed.get('Steps'), 'CheckMSESTSEvaluation') mon_step = extract_step_from_list( mse_step.get('Arguments').get('IfSteps'), 'SetupMonitoringData') outputs['baseline'] = get_outputs(mon_step) # take de s3 uri of train, validate, and test datasets train_step_def = extract_step_from_list(parsed.get('Steps'), 'PreprocessSTSData') outputs['train'] = get_outputs(train_step_def) # -- # whrite the pipeline def and the selected outputs to a json # file with open('trainmodel_out.json', 'w') as f: json.dump(outputs, f) # --- except Exception as e: _l.exception(f"Exception: {e}")