def test_get_caller_identity_arn_from_an_user(boto_session): sess = Session(boto_session) arn = 'arn:aws:iam::369233609183:user/mia' sess.boto_session.client('sts').get_caller_identity.return_value = {'Arn': arn} sess.boto_session.client('iam').get_role.return_value = {'Role': {'Arn': arn}} actual = sess.get_caller_identity_arn() assert actual == 'arn:aws:iam::369233609183:user/mia'
def test_get_caller_identity_arn_from_a_execution_role(boto_session): sess = Session(boto_session) arn = 'arn:aws:sts::369233609183:assumed-role/AmazonSageMaker-ExecutionRole-20171129T072388/SageMaker' sess.boto_session.client('sts').get_caller_identity.return_value = {'Arn': arn} sess.boto_session.client('iam').get_role.return_value = {'Role': {'Arn': arn}} actual = sess.get_caller_identity_arn() assert actual == 'arn:aws:iam::369233609183:role/service-role/AmazonSageMaker-ExecutionRole-20171129T072388'
def test_get_caller_identity_arn_from_an_user_without_permissions(boto_session): sess = Session(boto_session) arn = 'arn:aws:iam::369233609183:user/mia' sess.boto_session.client('sts').get_caller_identity.return_value = {'Arn': arn} sess.boto_session.client('iam').get_role.side_effect = ClientError({}, {}) with patch('logging.Logger.warning') as mock_logger: actual = sess.get_caller_identity_arn() assert actual == 'arn:aws:iam::369233609183:user/mia' mock_logger.assert_called_once()
def test_get_caller_identity_arn_from_a_role(boto_session): sess = Session(boto_session) arn = 'arn:aws:sts::369233609183:assumed-role/SageMakerRole/6d009ef3-5306-49d5-8efc-78db644d8122' sess.boto_session.client('sts').get_caller_identity.return_value = {'Arn': arn} expected_role = 'arn:aws:iam::369233609183:role/SageMakerRole' sess.boto_session.client('iam').get_role.return_value = {'Role': {'Arn': expected_role}} actual = sess.get_caller_identity_arn() assert actual == expected_role
def test_get_caller_identity_arn_from_role_with_path(boto_session): sess = Session(boto_session) arn_prefix = 'arn:aws:iam::369233609183:role' role_name = 'name' sess.boto_session.client('sts').get_caller_identity.return_value = {'Arn': '/'.join([arn_prefix, role_name])} role_path = 'path' role_with_path = '/'.join([arn_prefix, role_path, role_name]) sess.boto_session.client('iam').get_role.return_value = {'Role': {'Arn': role_with_path}} actual = sess.get_caller_identity_arn() assert actual == role_with_path
def test_user_agent_injected_with_nbi(boto_session): assert ("AWS-SageMaker-Python-SDK" not in boto_session.client("sagemaker")._client_config.user_agent) with patch("six.moves.builtins.open", mock_open(read_data="120.0-0")) as mo: sess = Session(boto_session) mo.assert_called_with( "/etc/opt/ml/sagemaker-notebook-instance-version.txt") assert "AWS-SageMaker-Python-SDK" in sess.sagemaker_client._client_config.user_agent assert "AWS-SageMaker-Python-SDK" in sess.sagemaker_runtime_client._client_config.user_agent assert "AWS-SageMaker-Notebook-Instance" in sess.sagemaker_client._client_config.user_agent assert ("AWS-SageMaker-Notebook-Instance" in sess.sagemaker_runtime_client._client_config.user_agent)
def test_user_agent_injected_with_nbi_ioerror(boto_session): assert ("AWS-SageMaker-Python-SDK" not in boto_session.client("sagemaker")._client_config.user_agent) with patch("six.moves.builtins.open", MagicMock(side_effect=IOError("File not found"))) as mo: sess = Session(boto_session) mo.assert_called_with( "/etc/opt/ml/sagemaker-notebook-instance-version.txt") assert "AWS-SageMaker-Python-SDK" in sess.sagemaker_client._client_config.user_agent assert "AWS-SageMaker-Python-SDK" in sess.sagemaker_runtime_client._client_config.user_agent assert "AWS-SageMaker-Notebook-Instance" not in sess.sagemaker_client._client_config.user_agent assert ("AWS-SageMaker-Notebook-Instance" not in sess.sagemaker_runtime_client._client_config.user_agent)
def sagemaker_session(sagemaker_client_config, sagemaker_runtime_config, boto_session): sagemaker_client_config.setdefault("config", Config(retries=dict(max_attempts=10))) sagemaker_client = (boto_session.client("sagemaker", ** sagemaker_client_config) if sagemaker_client_config else None) runtime_client = (boto_session.client("sagemaker-runtime", ** sagemaker_runtime_config) if sagemaker_runtime_config else None) return Session( boto_session=boto_session, sagemaker_client=sagemaker_client, sagemaker_runtime_client=runtime_client, )
def __init__(self, model_data, role=None, spark_version=2.4, sagemaker_session=None, **kwargs): """Initialize a SparkMLModel. Args: model_data (str): The S3 location of a SageMaker model data ``.tar.gz`` file. For SparkML, this will be the output that has been produced by the Spark job after serializing the Model via MLeap. role (str): An AWS IAM role (either name or full ARN). The Amazon SageMaker training jobs and APIs that create Amazon SageMaker endpoints use this role to access training data and model artifacts. After the endpoint is created, the inference code might use the IAM role, if it needs to access an AWS resource. spark_version (str): Spark version you want to use for executing the inference (default: '2.4'). sagemaker_session (sagemaker.session.Session): Session object which manages interactions with Amazon SageMaker APIs and any other AWS services needed. If not specified, the estimator creates one using the default AWS configuration chain. For local mode, please do not pass this variable. **kwargs: Additional parameters passed to the :class:`~sagemaker.model.Model` constructor. .. tip:: You can find additional parameters for initializing this class at :class:`~sagemaker.model.Model`. """ # For local mode, sagemaker_session should be passed as None but we need a session to get # boto_region_name region_name = (sagemaker_session or Session()).boto_region_name image_uri = image_uris.retrieve(framework_name, region_name, version=spark_version) super(SparkMLModel, self).__init__( image_uri, model_data, role, predictor_cls=SparkMLPredictor, sagemaker_session=sagemaker_session, **kwargs, )
def sagemaker_session(sagemaker_client_config, sagemaker_runtime_config, boto_config): boto_session = boto3.Session( **boto_config) if boto_config else boto3.Session( region_name=DEFAULT_REGION) sagemaker_client_config.setdefault('config', Config(retries=dict(max_attempts=10))) sagemaker_client = boto_session.client( 'sagemaker', ** sagemaker_client_config) if sagemaker_client_config else None runtime_client = (boto_session.client('sagemaker-runtime', ** sagemaker_runtime_config) if sagemaker_runtime_config else None) return Session(boto_session=boto_session, sagemaker_client=sagemaker_client, sagemaker_runtime_client=runtime_client)
def __init__(self, endpoint, sagemaker_session=None): """ Initializes a SparkMLPredictor which should be used with SparkMLModel to perform predictions against SparkML models serialized via MLeap. The response is returned in text/csv format which is the default response format for SparkML Serving container. Args: endpoint (str): The name of the endpoint to perform inference on. sagemaker_session (sagemaker.session.Session): Session object which manages interactions with Amazon SageMaker APIs and any other AWS services needed. If not specified, the estimator creates one using the default AWS configuration chain. """ sagemaker_session = sagemaker_session or Session() super(SparkMLPredictor, self).__init__(endpoint=endpoint, sagemaker_session=sagemaker_session, serializer=csv_serializer, content_type=CONTENT_TYPE_CSV)
def sagemaker_session_with_custom_bucket(boto_session, sagemaker_client_config, sagemaker_runtime_config, custom_bucket_name): sagemaker_client_config.setdefault("config", Config(retries=dict(max_attempts=10))) sagemaker_client = (boto_session.client("sagemaker", ** sagemaker_client_config) if sagemaker_client_config else None) runtime_client = (boto_session.client("sagemaker-runtime", ** sagemaker_runtime_config) if sagemaker_runtime_config else None) return Session( boto_session=boto_session, sagemaker_client=sagemaker_client, sagemaker_runtime_client=runtime_client, default_bucket=custom_bucket_name, )
def test_sagemaker_session_does_not_create_bucket_on_init( sagemaker_client_config, sagemaker_runtime_config, boto_session): sagemaker_client_config.setdefault("config", Config(retries=dict(max_attempts=10))) sagemaker_client = (boto_session.client("sagemaker", ** sagemaker_client_config) if sagemaker_client_config else None) runtime_client = (boto_session.client("sagemaker-runtime", ** sagemaker_runtime_config) if sagemaker_runtime_config else None) Session( boto_session=boto_session, sagemaker_client=sagemaker_client, sagemaker_runtime_client=runtime_client, default_bucket=CUSTOM_BUCKET_NAME, ) s3 = boto3.resource("s3", region_name=boto_session.region_name) assert s3.Bucket(CUSTOM_BUCKET_NAME).creation_date is None
def upload(src, dst, gz, session: sagemaker.Session, root='.'): dst = cli_argument(dst, session=session) if not os.path.exists(src): raise click.UsageError("Source must exist") if not dst.startswith('s3://'): if dst.startswith('/'): dst = dst[1:] bucket = session.default_bucket() dst = 's3://{}/{}'.format(bucket, dst) url = urlparse(dst) assert url.scheme == 's3' bucket = url.netloc key = url.path if key.startswith('/'): key = key[1:] if os.path.isfile(src): if gz: raise click.UsageError( "Option gz is only valid for source directories") s3 = session.boto_session.client('s3') s3.upload_file(src, bucket, key) elif os.path.isdir(src): if gz: if not re.match(".*\\.(tar\\.gz||tgz)$", dst, re.IGNORECASE): raise click.UsageError( "Destination should end in .tar.gz or tgz") s3_dst = os.path.dirname(dst) file_name = os.path.basename(dst) with _tmpdir() as tmp: p = os.path.join(tmp, file_name) with tarfile.open(p, 'w:gz') as arc: arc.add(name=src, arcname=root, recursive=True) s3 = session.boto_session.client('s3') s3.upload_file(p, bucket, key) else: S3Uploader.upload(local_path=src, desired_s3_uri=dst, sagemaker_session=session) else: raise click.UsageError("Source must be file or directory")
def create_clarify_bias_job(event) : role = event["Input"]["Payload"]["security-config"]["iam_role"] ws_params = event["Input"]["Payload"]["workspace-config"] data_params = event["Input"]["Payload"]["data-config"] model_params = event["Input"]["Payload"]["model-config"] automl_params = event["Input"]["Payload"]["automl-config"] bias_analysis_params = event["Input"]["Payload"]["bias-analysis-config"] # This is a temporary workaround. The bias detection job behaves differently when # files are split. Remove when the bug is fixed. ############################################################################################## merged_files_dst = "s3://{}/{}/{}".format( ws_params["s3_bucket"], ws_params["s3_prefix"], "data/merged") input_path = create_merged_dataset(automl_params["data_uri"], merged_files_dst, automl_params["target_name"]) ################################## End Workaround ############################################ session = Session() clarify_processor = clarify.SageMakerClarifyProcessor( role=role, instance_count=bias_analysis_params["instance_count"], instance_type=bias_analysis_params["instance_type"], sagemaker_session=session) output_uri = "s3://{}/{}/{}".format(ws_params["s3_bucket"], ws_params["s3_prefix"], bias_analysis_params["output_prefix"]) bias_data_config = clarify.DataConfig( s3_data_input_path=input_path, s3_output_path=output_uri, label=automl_params["target_name"], headers=get_data_columns(automl_params["data_uri"]), dataset_type='text/csv') model_config = clarify.ModelConfig( model_name=model_params["model_name"], instance_type=model_params["instance_type"], instance_count=model_params["instance_count"], accept_type='text/csv', content_type = 'text/csv') pred_params = bias_analysis_params["prediction-config"] predictions_config = clarify.ModelPredictedLabelConfig( label= pred_params["label"], probability= pred_params["probability"], probability_threshold=pred_params["probability_threshold"], label_headers=pred_params["label_headers"]) bias_params = bias_analysis_params["bias-config"] bias_config = clarify.BiasConfig( label_values_or_threshold=bias_params["label_values_or_threshold"], facet_name=bias_params["facet_name"], facet_values_or_threshold=bias_params["facet_values_or_threshold"], group_name=bias_params["group_name"]) clarify_processor.run_bias( job_name = bias_analysis_params["job_name"], data_config=bias_data_config, bias_config=bias_config, model_config=model_config, model_predicted_label_config=predictions_config, pre_training_methods='all', post_training_methods='all', wait=False, logs=False)
def test_get_caller_identity_arn_from_an_user(boto_session): sess = Session(boto_session) sess.boto_session.client('sts').get_caller_identity.return_value = {'Arn': 'arn:aws:iam::369233609183:user/mia'} actual = sess.get_caller_identity_arn() assert actual == 'arn:aws:iam::369233609183:user/mia'
def test_delete_endpoint(boto_session): sess = Session(boto_session) sess.delete_endpoint('my_endpoint') boto_session.client().delete_endpoint.assert_called_with( EndpointName='my_endpoint')
def _status(self): return self._process_latest(Session().describe_processing_job)
def _delete(self): self._process_latest(Session().stop_processing_job) return self._process_latest(Session().describe_processing_job)
from __future__ import absolute_import import itertools import json import os import shutil import subprocess import click import pandas as pd from sagemaker import Session from sagemaker.tensorflow import TensorFlow dir_path = os.path.dirname(os.path.realpath(__file__)) benchmark_results_dir = os.path.join("s3://", Session().default_bucket(), "hvd-benchmarking") @click.group() def cli(): pass def generate_report(): results_dir = os.path.join(dir_path, "results") if os.path.exists(results_dir): shutil.rmtree(results_dir) subprocess.call(
# or in the "license" file accompanying this file. This file is # distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF # ANY KIND, either express or implied. See the License for the specific # language governing permissions and limitations under the License. from __future__ import absolute_import import argparse import itertools import os from sagemaker import Session from sagemaker.estimator import Framework from sagemaker.tensorflow import TensorFlow default_bucket = Session().default_bucket dir_path = os.path.dirname(os.path.realpath(__file__)) _DEFAULT_HYPERPARAMETERS = { 'batch_size': 32, 'model': 'resnet32', 'num_epochs': 10, 'data_format': 'NHWC', 'summary_verbosity': 1, 'save_summaries_steps': 10, 'data_name': 'cifar10' } class ScriptModeTensorFlow(Framework): """This class is temporary until the final version of Script Mode is released.
def __init__( self, entry_point="train.py", source_dir=None, hyperparameters=None, py_version="py3", framework_version={ "transformers": "4.1.1", "datasets": "1.1.3" }, image_name=None, distributions=None, **kwargs, ): # validating framework_version and python version self.framework_version = framework_version self.py_version = py_version validate_version_or_image_args(self.framework_version, self.py_version) # checking for instance_type if "instance_type" in kwargs: self.instance_type = kwargs["instance_type"] else: self.instance_type = "local" # build ecr_uri self.image_uri = self._get_container_image("training") # using or create a sagemaker session if "sagemaker_session" in kwargs: self.sagemaker_session = kwargs["sagemaker_session"] else: self.sagemaker_session = Session() # for distributed training # if distribution is not None: # instance_type = renamed_kwargs( # "train_instance_type", "instance_type", kwargs.get("instance_type"), kwargs # ) # validate_smdistributed( # instance_type=instance_type, # framework_name=self._framework_name, # framework_version=framework_version, # py_version=py_version, # distribution=distribution, # image_uri=image_uri, # ) # warn_if_parameter_server_with_multi_gpu( # training_instance_type=instance_type, distribution=distribution # ) # if "enable_sagemaker_metrics" not in kwargs: # # enable sagemaker metrics for PT v1.3 or greater: # if self.framework_version and Version(self.framework_version) >= Version("1.3"): # kwargs["enable_sagemaker_metrics"] = True super(HuggingFace, self).__init__(entry_point, source_dir, hyperparameters, image_uri=self.image_uri, **kwargs)
def n_virginia_sagemaker_session(n_virginia_region): return Session(boto_session=boto3.Session(region_name=n_virginia_region))
def sagemaker_session(): return Session(boto_session=boto3.Session(region_name=REGION))
for array in np.array_split(dataset[:, :-1], 100): result = model.predict(array) predictions += [r['predicted_label'] for r in result['predictions']] predictions = np.array(predictions) return deo_from_list(dataset, predictions, groupA_idxs, groupB_idxs) # ## Prerequisites and Data <a class="anchor" id="pre_and_data"> # ### Initialize SageMaker <a class="anchor" id="initsagemaker"> # In[5]: from sagemaker import Session bucket = Session().default_bucket() #'fairness-test2' prefix = 'sagemaker/DEMO-linear-adult' # Define IAM role from sagemaker import get_execution_role import pandas as pd import numpy as np import urllib import os import sklearn.preprocessing as preprocessing import seaborn as sns role = get_execution_role() # ### Download data <a class="anchor" id="download_data"> # Data Source: [https://archive.ics.uci.edu/ml/machine-learning-databases/adult/](https://archive.ics.uci.edu/ml/machine-learning-databases/adult/)
def model_create(job, model_artifact, name, session: sagemaker.Session, inference_image, inference_image_path, inference_image_accounts, role, force, multimodel=False, accelerator_type=None): job = cli_argument(job, session=session) name = cli_argument(name, session=session) model_artifact = cli_argument(model_artifact, session=session) image_config = Image(tag=inference_image, path=inference_image_path, accounts=inference_image_accounts) image_uri = ecr_ensure_image(image=image_config, session=session.boto_session) if (job and model_artifact) or (not (job or model_artifact)): raise click.UsageError('Specify one of job_name or model_artifact') if model_artifact and not name: raise click.UsageError('name is required if job is not provided') iam = session.boto_session.client('iam') client = session.boto_session.client('sagemaker') role = ensure_inference_role(iam=iam, role_name=role) if job: client = session.boto_session.client('sagemaker') model_artifact = training_describe( job_name=job, field='ModelArtifacts.S3ModelArtifacts', session=session) if not name: name = job print("Creating model [{}] from job [{}] artifact [{}]".format( name, job, model_artifact)) else: if not model_artifact.startswith('s3://'): if model_artifact.startswith('/'): model_artifact = model_artifact[1:] bucket = session.default_bucket() model_artifact = 's3://{}/{}'.format(bucket, model_artifact) print("Creating model [{}] from artifact [{}]".format( name, model_artifact)) if model_exists(name=name, client=client): if force: print("Deleting existing model") model_delete(name=name, client=client) else: raise click.UsageError('Specify force if overwriting model') model = sagemaker.Model( image_uri=image_uri, model_data=model_artifact, role=role, predictor_cls=None, env=None, name=name, # vpc_config=None, sagemaker_session=session, # enable_network_isolation=False, # model_kms_key=None ) container_def = sagemaker.container_def( model.image_uri, model.model_data, model.env, container_mode='MultiModel' if multimodel else 'SingleModel') """ client.create_model( ModelName='string', PrimaryContainer={ 'ContainerHostname': 'string', 'Image': 'string', 'ImageConfig': { 'RepositoryAccessMode': 'Platform'|'Vpc' }, 'Mode': 'SingleModel'|'MultiModel', 'ModelDataUrl': 'string', 'Environment': { 'string': 'string' }, 'ModelPackageName': 'string' }, """ # self._ensure_base_name_if_needed(container_def["Image"]) # self._set_model_name_if_needed() enable_network_isolation = model.enable_network_isolation() # self._init_sagemaker_session_if_does_not_exist(instance_type) session.create_model( model.name, model.role, container_def, vpc_config=model.vpc_config, enable_network_isolation=enable_network_isolation, # tags=tags, )
def sagemaker_session(): sess = Session() return sess
def test_delete_endpoint_config(boto_session): sess = Session(boto_session) sess.delete_endpoint_config('my_endpoint_config') boto_session.client().delete_endpoint_config.assert_called_with(EndpointConfigName='my_endpoint_config')
def test_delete_endpoint(boto_session): sess = Session(boto_session) sess.delete_endpoint('my_endpoint') boto_session.client().delete_endpoint.assert_called_with(EndpointName='my_endpoint')
def __init__( self, entry_point="train.py", source_dir: str = None, hyperparameters: Optional[Dict[str, str]] = None, framework_version={"transformers": "4.1.1", "datasets": "1.1.3"}, image_uri: Optional[str] = None, huggingface_token: str = None, # distribution=None, **kwargs, ): """ Args: entry_point (str): Path (absolute or relative) to the Python source file which should be executed as the entry point to training. If ``source_dir`` is specified, then ``entry_point`` must point to a file located at the root of ``source_dir``. source_dir (str): Path (absolute, relative or an S3 URI) to a directory with any other training source code dependencies aside from the entry point file (default: None). If ``source_dir`` is an S3 URI, it must point to a tar.gz file. Structure within this directory are preserved when training on Amazon SageMaker. hyperparameters (dict): Hyperparameters that will be used for training (default: None). The hyperparameters are made accessible as a dict[str, str] to the training code on SageMaker. For convenience, this accepts other types for keys and values, but ``str()`` will be called to convert them before training. framework_version (dict): Transformers and datasets versions you want to use for executing your model training code. Defaults to ``{"transformers": "4.1.1", "datasets": "1.1.3"}``. Required unless ``image_uri`` is provided. List of supported versions: https://github.com/aws/sagemaker-python-sdk#pytorch-sagemaker-estimators. image_uri (str): If specified, the estimator will use this image for training and hosting, instead of selecting the appropriate SageMaker official image based on framework_version and py_version. It can be an ECR url or dockerhub image and tag. Examples: * ``123412341234.dkr.ecr.us-west-2.amazonaws.com/my-custom-image:1.0`` * ``custom-image:latest`` If ``framework_version`` or ``py_version`` are ``None``, then ``image_uri`` is required. If also ``None``, then a ``ValueError`` will be raised. huggingface_token (str): HuggingFace Hub authentication token for uploading your model files. You can get this by either using the [transformers-cli](https://huggingface.co/transformers/model_sharing.html) with `transfomers-cli login` or using the `login()` method of `transformers.hf_api`. If the HuggingFace Token is provided the model will uploaded automatically to the model hub using the `base_job_name` as repository name. """ # validating framework_version and python version self.framework_version = framework_version self.py_version = "py3" validate_version_or_image_args(self.framework_version, self.py_version) # checking for instance_type if "instance_type" in kwargs: self.instance_type = kwargs["instance_type"] else: self.instance_type = "local" # build ecr_uri self.image_uri = self._get_container_image("training") # using or create a sagemaker session if "sagemaker_session" in kwargs: self.sagemaker_session = kwargs["sagemaker_session"] else: self.sagemaker_session = Session() super(HuggingFace, self).__init__(entry_point, source_dir, hyperparameters, image_uri=self.image_uri, **kwargs) if huggingface_token: logger.info( f"estimator initialized with HuggingFace Token, model will be uploaded to hub using the {self.base_job_name} as repostiory name" ) self.huggingface_token = huggingface_token
# Once this is complete, click on **Update Trust Policy** and you are done. # ## 1.2 Setup S3 bucket # # First, we need to setup an S3 bucket within your account, and upload the necessary files to this bucket. To setup the bucket, we will run the first code block, labeled Setup S3 bucket. To run the cell while the code cell is selected, you can either press Shift and Return at the same time or select the Run button at the top of the Jupyter notebook. # In[2]: import boto3 import botocore from botocore.exceptions import ClientError from sagemaker import Session as Sess # SageMaker session sess = Sess() # Boto3 session session = boto3.session.Session() s3 = session.resource('s3') account = session.client('sts').get_caller_identity()['Account'] region = session.region_name bucket_name = 'sagemaker-glue-process-{}-{}'.format("test", region) try: if region == 'us-east-1': s3.create_bucket(Bucket=bucket_name) else: s3.create_bucket( Bucket=bucket_name,
def _logs(self): return self._process_latest(Session().logs_for_job)
def __init__( self, framework_version, role, instance_type, instance_count, command=None, volume_size_in_gb=30, volume_kms_key=None, output_kms_key=None, max_runtime_in_seconds=None, base_job_name=None, sagemaker_session=None, env=None, tags=None, network_config=None, ): """Initialize an ``SKLearnProcessor`` instance. The SKLearnProcessor handles Amazon SageMaker processing tasks for jobs using scikit-learn. Args: framework_version (str): The version of scikit-learn. role (str): An AWS IAM role name or ARN. The Amazon SageMaker training jobs and APIs that create Amazon SageMaker endpoints use this role to access training data and model artifacts. After the endpoint is created, the inference code might use the IAM role, if it needs to access an AWS resource. instance_type (str): Type of EC2 instance to use for processing, for example, 'ml.c4.xlarge'. instance_count (int): The number of instances to run the Processing job with. Defaults to 1. command ([str]): The command to run, along with any command-line flags. Example: ["python3", "-v"]. If not provided, ["python3"] or ["python2"] will be chosen based on the py_version parameter. volume_size_in_gb (int): Size in GB of the EBS volume to use for storing data during processing (default: 30). volume_kms_key (str): A KMS key for the processing volume. output_kms_key (str): The KMS key id for all ProcessingOutputs. max_runtime_in_seconds (int): Timeout in seconds. After this amount of time Amazon SageMaker terminates the job regardless of its current status. base_job_name (str): Prefix for processing name. If not specified, the processor generates a default job name, based on the training image name and current timestamp. sagemaker_session (sagemaker.session.Session): Session object which manages interactions with Amazon SageMaker APIs and any other AWS services needed. If not specified, the processor creates one using the default AWS configuration chain. env (dict): Environment variables to be passed to the processing job. tags ([dict]): List of tags to be passed to the processing job. network_config (sagemaker.network.NetworkConfig): A NetworkConfig object that configures network isolation, encryption of inter-container traffic, security group IDs, and subnets. """ if not command: command = ["python3"] session = sagemaker_session or Session() region = session.boto_region_name image_uri = image_uris.retrieve(defaults.SKLEARN_NAME, region, version=framework_version, instance_type=instance_type) super(SKLearnProcessor, self).__init__( role=role, image_uri=image_uri, instance_count=instance_count, instance_type=instance_type, command=command, volume_size_in_gb=volume_size_in_gb, volume_kms_key=volume_kms_key, output_kms_key=output_kms_key, max_runtime_in_seconds=max_runtime_in_seconds, base_job_name=base_job_name, sagemaker_session=session, env=env, tags=tags, network_config=network_config, )
from json import dumps from scipy.sparse import lil_matrix from sagemaker import Session from sagemaker.predictor import RealTimePredictor, json_deserializer sm_sess = Session() def build_spare_matrix_payload(movie_id): # 943 - nb_users_train x_movie_id = 943 + int(movie_id) # 2625 - nb_features X_new = lil_matrix((1, 2625)).astype('float32') X_new[0, 944] = 1 X_new[0, x_movie_id] = 1 payload = X_new[0].toarray() return (payload) def post_process(result): score = result["predictions"][0]["score"] pp_score = round(score * 100, 2) predicted_label = result["predictions"][0]["predicted_label"] if predicted_label == 1: txt = "Chance that you will like this: " + str(pp_score) else: txt = "Chance that you will like this: " + str(round(
def sagemaker_session(region): return Session(boto_session=boto3.Session(region_name=region))
from __future__ import absolute_import import itertools import json import os import shutil import subprocess import click import pandas as pd from sagemaker import Session from sagemaker.tensorflow import TensorFlow dir_path = os.path.dirname(os.path.realpath(__file__)) benchmark_results_dir = os.path.join('s3://', Session().default_bucket(), 'hvd-benchmarking') @click.group() def cli(): pass def generate_report(): results_dir = os.path.join(dir_path, 'results') if os.path.exists(results_dir): shutil.rmtree(results_dir) subprocess.call(['aws', 's3', 'cp', '--recursive', benchmark_results_dir, results_dir])