def main(): test_loader = download_data_for_inference() sagemaker_session = LocalSession() sagemaker_session.config = {'local': {'local_code': True}} # For local training a dummy role will be sufficient role = DUMMY_IAM_ROLE model_dir = 's3://aws-ml-blog/artifacts/pytorch-script-mode-local-model-inference/model.tar.gz' model = PyTorchModel( role=role, model_data=model_dir, framework_version='1.8', py_version='py3', entry_point='inference.py' ) print('Deploying endpoint in local mode') print( 'Note: if launching for the first time in local mode, container image download might take a few minutes to complete.') predictor = model.deploy( initial_instance_count=1, instance_type='local', ) do_inference_on_local_endpoint(predictor, test_loader) predictor.delete_endpoint(predictor.endpoint)
def main(): session = LocalSession() session.config = {'local': {'local_code': True}} role = DUMMY_IAM_ROLE model_dir = 's3://tensorflow-script-mode-local-model-inference/model.tar.gz' model = TensorFlowModel( entry_point='inference.py', source_dir='./code', role=role, model_data=model_dir, framework_version='2.3.0', ) print('Deploying endpoint in local mode') predictor = model.deploy( initial_instance_count=1, instance_type='local', ) dummy_inputs = { 'bucket_name': 'tensorflow-script-mode-local-model-inference', 'object_name': 'instances.json' } predictions = predictor.predict(dummy_inputs) print("predictions: {}".format(predictions)) print('About to delete the endpoint') predictor.delete_endpoint(predictor.endpoint_name) predictor.delete_model()
def main(): sagemaker_session = LocalSession() sagemaker_session.config = {'local': {'local_code': True}} # For local training a dummy role will be sufficient role = 'arn:aws:iam::111111111111:role/service-role/AmazonSageMaker-ExecutionRole-20200101T000001' print('Deploying local mode endpoint') print( 'Note: if launching for the first time in local mode, container image download might take a few minutes to complete.' ) pytorch_model = PyTorchModel(model_data='./model/model.tar.gz', role=role, framework_version="1.7.1", source_dir="code", py_version="py3", entry_point="inference.py") predictor = pytorch_model.deploy(initial_instance_count=1, instance_type='local') predictor.serializer = sagemaker.serializers.JSONSerializer() predictor.deserializer = sagemaker.deserializers.JSONDeserializer() result = predictor.predict("אני אוהב לעבוד באמזון") print('result: {}'.format(result)) predictor.delete_endpoint(predictor.endpoint)
def main(): testloader = download_training_data() sagemaker_session = LocalSession() sagemaker_session.config = {'local': {'local_code': True}} # For local training a dummy role will be sufficient role = 'arn:aws:iam::111111111111:role/service-role/AmazonSageMaker-ExecutionRole-20200101T000001' print('Starting model training') print( 'Note: if launching for the first time in local mode, container image download might take a few minutes to complete.' ) cifar10_estimator = PyTorch(entry_point='cifar10_pytorch.py', source_dir='./code', role=role, framework_version='1.7.1', py_version='py3', instance_count=1, instance_type='local', hyperparameters={ 'epochs': 1, }) cifar10_estimator.fit('file://./data/') print('Deploying local mode endpoint') predictor = cifar10_estimator.deploy(initial_instance_count=1, instance_type='local') do_inference_on_local_endpoint(predictor, testloader) predictor.delete_endpoint(predictor.endpoint) predictor.delete_model()
def main(): download_training_and_eval_data() sagemaker_session = LocalSession() sagemaker_session.config = {'local': {'local_code': True}} # For local training a dummy role will be sufficient role = 'arn:aws:iam::111111111111:role/service-role/AmazonSageMaker-ExecutionRole-20200101T000001' print('Starting model training') mnist_estimator = TensorFlow( entry_point='mnist_tf2.py', role=role, instance_count=1, instance_type='local', framework_version='2.3.0', py_version='py37', distribution={'parameter_server': { 'enabled': True }}) mnist_estimator.fit("file://./data/") print('Deploying local mode endpoint') predictor = mnist_estimator.deploy(initial_instance_count=1, instance_type='local') do_inference_on_local_endpoint(predictor) predictor.delete_endpoint(predictor.endpoint) predictor.delete_model()
def __init__(self, transform_job_name, model_name, local_session=None): """ Args: transform_job_name: model_name: local_session: """ from sagemaker.local import LocalSession self.local_session = local_session or LocalSession() local_client = self.local_session.sagemaker_client self.name = transform_job_name self.model_name = model_name # TODO - support SageMaker Models not just local models. This is not # ideal but it may be a good thing to do. self.primary_container = local_client.describe_model( model_name)["PrimaryContainer"] self.container = None self.start_time = None self.end_time = None self.batch_strategy = None self.transform_resources = None self.input_data = None self.output_data = None self.environment = {} self.state = _LocalTransformJob._CREATING
def __init__(self, endpoint_name, endpoint_config_name, tags=None, local_session=None): # runtime import since there is a cyclic dependency between entities and local_session """ Args: endpoint_name: endpoint_config_name: tags: local_session: """ from sagemaker.local import LocalSession self.local_session = local_session or LocalSession() local_client = self.local_session.sagemaker_client self.name = endpoint_name self.endpoint_config = local_client.describe_endpoint_config( endpoint_config_name) self.production_variant = self.endpoint_config["ProductionVariants"][0] self.tags = tags model_name = self.production_variant["ModelName"] self.primary_container = local_client.describe_model( model_name)["PrimaryContainer"] self.container = None self.create_time = None self.state = _LocalEndpoint._CREATING
def __init__(self, role, train_instance_count, train_instance_type, train_volume_size=30, train_max_run=24 * 60 * 60, input_mode='File', output_path=None, output_kms_key=None, base_job_name=None, sagemaker_session=None): """Initialize an ``EstimatorBase`` instance. Args: role (str): An AWS IAM role (either name or full ARN). The Amazon SageMaker training jobs and APIs that create Amazon SageMaker endpoints use this role to access training data and model artifacts. After the endpoint is created, the inference code might use the IAM role, if it needs to access an AWS resource. train_instance_count (int): Number of Amazon EC2 instances to use for training. train_instance_type (str): Type of EC2 instance to use for training, for example, 'ml.c4.xlarge'. train_volume_size (int): Size in GB of the EBS volume to use for storing input data during training (default: 30). Must be large enough to store training data if File Mode is used (which is the default). train_max_run (int): Timeout in seconds for training (default: 24 * 60 * 60). After this amount of time Amazon SageMaker terminates the job regardless of its current status. input_mode (str): The input mode that the algorithm supports (default: 'File'). Valid modes: 'File' - Amazon SageMaker copies the training dataset from the S3 location to a local directory. 'Pipe' - Amazon SageMaker streams data directly from S3 to the container via a Unix-named pipe. output_path (str): S3 location for saving the trainig result (model artifacts and output files). If not specified, results are stored to a default bucket. If the bucket with the specific name does not exist, the estimator creates the bucket during the :meth:`~sagemaker.estimator.EstimatorBase.fit` method execution. output_kms_key (str): Optional. KMS key ID for encrypting the training output (default: None). base_job_name (str): Prefix for training job name when the :meth:`~sagemaker.estimator.EstimatorBase.fit` method launches. If not specified, the estimator generates a default job name, based on the training image name and current timestamp. sagemaker_session (sagemaker.session.Session): Session object which manages interactions with Amazon SageMaker APIs and any other AWS services needed. If not specified, the estimator creates one using the default AWS configuration chain. """ self.role = role self.train_instance_count = train_instance_count self.train_instance_type = train_instance_type self.train_volume_size = train_volume_size self.train_max_run = train_max_run self.input_mode = input_mode if self.train_instance_type in ('local', 'local_gpu'): if self.train_instance_type == 'local_gpu' and self.train_instance_count > 1: raise RuntimeError( "Distributed Training in Local GPU is not supported") self.sagemaker_session = sagemaker_session or LocalSession() else: self.sagemaker_session = sagemaker_session or Session() self.base_job_name = base_job_name self._current_job_name = None self.output_path = output_path self.output_kms_key = output_kms_key self.latest_training_job = None
def get_config(): print('Will run training locally in a container image.') session = LocalSession() session.config = {'local': {'local_code': True}} instance_type = 'local' training_dataset_path = "file://./data/" role = DUMMY_IAM_ROLE # not needed in local training s3_data_prefix = None # not needed in local training bucket = None # not needed in local training config = { 's3_data_prefix': s3_data_prefix, 'sagemaker_session': session, 'bucket': bucket, 'instance_type': instance_type, 'training_dataset_path': training_dataset_path, 'role': role} return config
def main(): session = LocalSession() session.config = {'local': {'local_code': True}} role = DUMMY_IAM_ROLE model_dir = 's3://aws-ml-blog/artifacts/tensorflow-script-mode-local-model-inference/model.tar.gz' model = TensorFlowModel( entry_point='inference.py', source_dir='./code', role=role, model_data=model_dir, framework_version='2.3.0', ) print('Deploying endpoint in local mode') print( 'Note: if launching for the first time in local mode, container image download might take a few minutes to complete.' ) predictor = model.deploy( initial_instance_count=1, instance_type='local', ) print('Endpoint deployed in local mode') dummy_inputs = { 'bucket_name': 'aws-ml-blog', 'object_name': 'artifacts/tensorflow-script-mode-local-model-inference/instances.json' } predictions = predictor.predict(dummy_inputs) print("predictions: {}".format(predictions)) print('About to delete the endpoint') predictor.delete_endpoint(predictor.endpoint_name)
def get_config(mode): assert mode is CLOUD_MODE or mode is LOCAL_MODE, f'unknown mode selected: {mode}' if mode == CLOUD_MODE: ## REPLACE WITH A VALID IAM ROLE - START ## role = DUMMY_IAM_ROLE ## REPLACE WITH A VALID IAM ROLE - END ## assert role is not DUMMY_IAM_ROLE, "For cloud mode set a valid sagemaker iam role" print('Will run training on an ML instance in AWS.') session = sagemaker.Session() bucket = session.default_bucket() s3_data_prefix = 'tensorflow_script_mode_cloud_training/mnist/' instance_type = 'ml.m5.large' training_dataset_path = 's3://' + bucket + '/' + s3_data_prefix else: # mode == LOCAL_MODE print('Will run training locally in a container image.') session = LocalSession() session.config = {'local': {'local_code': True}} instance_type = 'local' training_dataset_path = "file://./data/" role = DUMMY_IAM_ROLE # not needed in local training s3_data_prefix = None # not needed in local training bucket = None # not needed in local training config = { 'mode': mode, 's3_data_prefix': s3_data_prefix, 'sagemaker_session': session, 'bucket': bucket, 'instance_type': instance_type, 'training_dataset_path': training_dataset_path, 'role': role } return config
def main(): sagemaker_session = LocalSession() sagemaker_session.config = {'local': {'local_code': True}} # For local training a dummy role will be sufficient role = DUMMY_IAM_ROLE model_dir = 's3://aws-ml-blog/artifacts/pytorch-nlp-script-mode-local-model-inference/model.tar.gz' test_data = pd.read_csv('./data/test_data.csv', header=None) print(f'test_data: {test_data}') model = PyTorchModel(role=role, model_data=model_dir, framework_version='1.7.1', source_dir='code', py_version='py3', entry_point='inference.py') print('Deploying endpoint in local mode') print( 'Note: if launching for the first time in local mode, container image download might take a few minutes to complete.' ) predictor = model.deploy( initial_instance_count=1, instance_type='local', ) predictor.serializer = sagemaker.serializers.CSVSerializer() predictor.deserializer = sagemaker.deserializers.CSVDeserializer() predictions = predictor.predict(test_data.to_csv(header=False, index=False)) print(f'predictions: {predictions}') predictor.delete_endpoint(predictor.endpoint)
def deploy(self, initial_instance_count, instance_type, endpoint_name=None, tags=None): """Deploy this ``Model`` to an ``Endpoint`` and optionally return a ``Predictor``. Create a SageMaker ``Model`` and ``EndpointConfig``, and deploy an ``Endpoint`` from this ``Model``. If ``self.predictor_cls`` is not None, this method returns a the result of invoking ``self.predictor_cls`` on the created endpoint name. The name of the created model is accessible in the ``name`` field of this ``Model`` after deploy returns The name of the created endpoint is accessible in the ``endpoint_name`` field of this ``Model`` after deploy returns. Args: instance_type (str): The EC2 instance type to deploy this Model to. For example, 'ml.p2.xlarge'. initial_instance_count (int): The initial number of instances to run in the ``Endpoint`` created from this ``Model``. endpoint_name (str): The name of the endpoint to create (default: None). If not specified, a unique endpoint name will be created. Returns: callable[string, sagemaker.session.Session] or None: Invocation of ``self.predictor_cls`` on the created endpoint name, if ``self.predictor_cls`` is not None. Otherwise, return None. """ if not self.sagemaker_session: if instance_type in ('local', 'local_gpu'): self.sagemaker_session = LocalSession() else: self.sagemaker_session = Session() container_def = self.prepare_container_def(instance_type) self.name = self.name or name_from_image(container_def['Image']) self.sagemaker_session.create_model(self.name, self.role, container_def, vpc_config=self.vpc_config) production_variant = sagemaker.production_variant( self.name, instance_type, initial_instance_count) self.endpoint_name = endpoint_name or self.name self.sagemaker_session.endpoint_from_production_variants( self.endpoint_name, [production_variant], tags) if self.predictor_cls: return self.predictor_cls(self.endpoint_name, self.sagemaker_session)
def __init__( self, project_name, boto3_session=None, role_name=None, bucket_name=None, smSession=None, local_mode=False, prefix=None, ): """Constructor""" self.project_name = project_name if not role_name: role_name = f"{constants.DEFAULT_IAM_ROLE}_{project_name}" self.role_name = role_name self.role_created = False self.tasks = {} self.local_mode = local_mode self.prefix = prefix or "" self.defaultCodeParams = None if boto3_session is None: boto3_session = boto3.Session() self.boto3_session = boto3_session if smSession is None: if local_mode: from sagemaker.local import LocalSession smSession = LocalSession(boto_session=boto3_session) # smSession.config = {'local': {'local_code': True}} else: smSession = sagemaker.Session(boto_session=boto3_session) self.smSession = smSession if not bucket_name: bucket_name = self.smSession.default_bucket() self.bucket_name = bucket_name
import boto3 from sagemaker import get_execution_role from sagemaker.local import LocalSession # AWS profile for this session boto_sess = boto3.Session(profile_name='<profile_name>', region_name='<region_name>') # sage maker local session local_session = LocalSession(boto_sess) # S3 bucket name bucket = '<bucket_name>' # upload path for endpoint script. custom_code_upload_location = 's3://{}/customcode/tensorflow_cifar'.format( bucket) # the path saved artifacts (outputs of training) model_artifacts_location = 's3://{}/artifacts_cifar'.format(bucket) # IAM Role # For only local mode, it's not used but need to exits. role = '<role name>' from sagemaker.tensorflow import TensorFlow # TensorFlow Estimator estimator = TensorFlow( entry_point='cifar10_cnn.py', role=role,
def sagemaker_local_session(boto_session): return LocalSession(boto_session=boto_session)
def main(): #CWD=hydra.utils.get_original_cwd() #pythonスクリプトの実行場所を取得 args = parse() dotenv_path = os.path.join(os.getcwd(), '../.env') load_dotenv(dotenv_path) SAGEMAKER_IAM_ROLE = os.environ.get('SAGEMAKER_IAM_ROLE') creds = get_instance_cred() session = boto3.session.Session( aws_access_key_id=creds.access_key, aws_secret_access_key=creds.secret_key, aws_session_token=creds.token, region_name='ap-northeast-1', ) client = session.client('sagemaker') sagemaker_session = Session( boto_session=session, sagemaker_client=client, #default_bucket='test-bucket' ) dtnow = datetime.datetime.now() source_dir = os.path.join(os.getcwd(), 'MLP') with open('./MLP/config.yml', 'r') as yml: #config = yaml.load(yml) config = yaml.full_load(yml) input_data = os.path.join(config['Model']['InputDir'], config["Name"], '*.csv.gz') pipe_dir = os.path.join(config['Model']['PipeDir'], config['Name']) output_path = os.path.join(config['Model']['OutputDir'], 'smtest', 'model') if args.mode == 'sm': estimator = TensorFlow( image_uri='', source_dir=source_dir, entry_point='train.py', output_path='s3://train', #s3に出力 code_location= 's3:///source', # source_dirの保存先。必ずs3にコピーされる。output_pathと同じにしておくといい。 base_job_name= f'aladin-train-{datetime.datetime.strftime(dtnow, "%Y%m%d%H%M%S")}', #hyperparameters=hyperparameters, role=SAGEMAKER_IAM_ROLE, instance_count=1, #instance_type='ml.r5.8xlarge', instance_type='ml.p2.xlarge', sagemaker_session=sagemaker_session) estimator.fit({'train': 's3://test-bucket/'}) #s3テスト elif args.mode == 'local': #localの場合 estimator = TensorFlow( image_uri='', source_dir=source_dir, entry_point='train.py', #output_path='file:///home/ec2-user/out', #localの /home/ec2-user/...に出力 output_path=output_path, #localの /home/ec2-user/...に出力 code_location= 's3://source', # source_dirの保存先。必ずs3にコピーされる。output_pathと同じにしておくといい。 base_job_name='traintest', #hyperparameters=hyperparameters, role=SAGEMAKER_IAM_ROLE, instance_count=1, instance_type='local', sagemaker_session=LocalSession(boto_session=session), #ローカルモードの場合 ) # Starts a SageMaker training job and waits until completion. estimator.fit({ 'train': f'file://{source_dir}/dataset.csv', 'pipe': pipe_dir }) #localモード用。s3にするとすごく時間がかかるので.
def __init__( self, role, image_uri, instance_count, instance_type, entrypoint=None, volume_size_in_gb=30, volume_kms_key=None, output_kms_key=None, max_runtime_in_seconds=None, base_job_name=None, sagemaker_session=None, env=None, tags=None, network_config=None, ): """Initializes a ``Processor`` instance. The ``Processor`` handles Amazon SageMaker Processing tasks. Args: role (str): An AWS IAM role name or ARN. Amazon SageMaker Processing uses this role to access AWS resources, such as data stored in Amazon S3. image_uri (str): The URI of the Docker image to use for the processing jobs. instance_count (int): The number of instances to run a processing job with. instance_type (str): The type of EC2 instance to use for processing, for example, 'ml.c4.xlarge'. entrypoint (list[str]): The entrypoint for the processing job (default: None). This is in the form of a list of strings that make a command. volume_size_in_gb (int): Size in GB of the EBS volume to use for storing data during processing (default: 30). volume_kms_key (str): A KMS key for the processing volume (default: None). output_kms_key (str): The KMS key ID for processing job outputs (default: None). max_runtime_in_seconds (int): Timeout in seconds (default: None). After this amount of time, Amazon SageMaker terminates the job, regardless of its current status. If `max_runtime_in_seconds` is not specified, the default value is 24 hours. base_job_name (str): Prefix for processing job name. If not specified, the processor generates a default job name, based on the processing image name and current timestamp. sagemaker_session (:class:`~sagemaker.session.Session`): Session object which manages interactions with Amazon SageMaker and any other AWS services needed. If not specified, the processor creates one using the default AWS configuration chain. env (dict[str, str]): Environment variables to be passed to the processing jobs (default: None). tags (list[dict]): List of tags to be passed to the processing job (default: None). For more, see https://docs.aws.amazon.com/sagemaker/latest/dg/API_Tag.html. network_config (:class:`~sagemaker.network.NetworkConfig`): A :class:`~sagemaker.network.NetworkConfig` object that configures network isolation, encryption of inter-container traffic, security group IDs, and subnets. """ self.role = role self.image_uri = image_uri self.instance_count = instance_count self.instance_type = instance_type self.entrypoint = entrypoint self.volume_size_in_gb = volume_size_in_gb self.volume_kms_key = volume_kms_key self.output_kms_key = output_kms_key self.max_runtime_in_seconds = max_runtime_in_seconds self.base_job_name = base_job_name self.env = env self.tags = tags self.network_config = network_config self.jobs = [] self.latest_job = None self._current_job_name = None self.arguments = None if self.instance_type in ("local", "local_gpu"): if not isinstance(sagemaker_session, LocalSession): sagemaker_session = LocalSession() self.sagemaker_session = sagemaker_session or Session()
def sagemaker_local_session(boto_config): if boto_config: boto_session = boto3.Session(**boto_config) else: boto_session = boto3.Session(region_name=DEFAULT_REGION) return LocalSession(boto_session=boto_session)
from tensorflow.keras import datasets import numpy as np from sagemaker.tensorflow import TensorFlow import os from sagemaker.local import LocalSession sagemaker_role = 'arn:aws:iam::70******AccountId:role/RoleNameHere' sagemaker_session = LocalSession() sagemaker_session.config = {'local': {'local_code': True}} def sagemaker_estimator(sagemaker_role, code_entry, code_dir, hyperparameters): sm_estimator = TensorFlow(entry_point=code_entry, source_dir=code_dir, role=sagemaker_role, instance_type='local', instance_count=1, model_dir='/opt/ml/model', hyperparameters=hyperparameters, output_path='file://{}/model/'.format( os.getcwd()), framework_version='2.2', py_version='py37', script_mode=True) return sm_estimator def sagemaker_local_training(local_estimator, train_data_local): local_estimator.fit({'training': train_data_local}) return local_estimator
def sagemaker_local_session_no_local_code(boto_session): return LocalSession(boto_session=boto_session, disable_local_code=True)
import boto3 from sagemaker import get_execution_role from sagemaker.local import LocalSession # AWS profile for this session boto_sess = boto3.Session(profile_name='<profile name>', region_name='<region name>') # sage maker local session local_session = LocalSession(boto_sess) # S3 bucket name bucket = '<bucket name>' # upload path for endpoint script. custom_code_upload_location = 's3://{}/customcode/tensorflow_iris'.format( bucket) # the path saved artifacts (outputs of training) model_artifacts_location = 's3://{}/artifacts'.format(bucket) # IAM Role # For only local mode, it's not used but need to exits. role = '<role name>' from sagemaker.tensorflow import TensorFlow # TensorFlow Estimator iris_estimator = TensorFlow(entry_point='./iris_dnn_classifier.py', role=role, framework_version='1.12.0',