def test_get_caller_identity_arn_from_an_user(boto_session):
    sess = Session(boto_session)
    arn = 'arn:aws:iam::369233609183:user/mia'
    sess.boto_session.client('sts').get_caller_identity.return_value = {'Arn': arn}
    sess.boto_session.client('iam').get_role.return_value = {'Role': {'Arn': arn}}

    actual = sess.get_caller_identity_arn()
    assert actual == 'arn:aws:iam::369233609183:user/mia'
def test_get_caller_identity_arn_from_a_execution_role(boto_session):
    sess = Session(boto_session)
    arn = 'arn:aws:sts::369233609183:assumed-role/AmazonSageMaker-ExecutionRole-20171129T072388/SageMaker'
    sess.boto_session.client('sts').get_caller_identity.return_value = {'Arn': arn}
    sess.boto_session.client('iam').get_role.return_value = {'Role': {'Arn': arn}}

    actual = sess.get_caller_identity_arn()
    assert actual == 'arn:aws:iam::369233609183:role/service-role/AmazonSageMaker-ExecutionRole-20171129T072388'
def test_get_caller_identity_arn_from_an_user_without_permissions(boto_session):
    sess = Session(boto_session)
    arn = 'arn:aws:iam::369233609183:user/mia'
    sess.boto_session.client('sts').get_caller_identity.return_value = {'Arn': arn}
    sess.boto_session.client('iam').get_role.side_effect = ClientError({}, {})

    with patch('logging.Logger.warning') as mock_logger:
        actual = sess.get_caller_identity_arn()
        assert actual == 'arn:aws:iam::369233609183:user/mia'
        mock_logger.assert_called_once()
def test_get_caller_identity_arn_from_a_role(boto_session):
    sess = Session(boto_session)
    arn = 'arn:aws:sts::369233609183:assumed-role/SageMakerRole/6d009ef3-5306-49d5-8efc-78db644d8122'
    sess.boto_session.client('sts').get_caller_identity.return_value = {'Arn': arn}

    expected_role = 'arn:aws:iam::369233609183:role/SageMakerRole'
    sess.boto_session.client('iam').get_role.return_value = {'Role': {'Arn': expected_role}}

    actual = sess.get_caller_identity_arn()
    assert actual == expected_role
def test_get_caller_identity_arn_from_role_with_path(boto_session):
    sess = Session(boto_session)
    arn_prefix = 'arn:aws:iam::369233609183:role'
    role_name = 'name'
    sess.boto_session.client('sts').get_caller_identity.return_value = {'Arn': '/'.join([arn_prefix, role_name])}

    role_path = 'path'
    role_with_path = '/'.join([arn_prefix, role_path, role_name])
    sess.boto_session.client('iam').get_role.return_value = {'Role': {'Arn': role_with_path}}

    actual = sess.get_caller_identity_arn()
    assert actual == role_with_path
Exemple #6
0
def test_user_agent_injected_with_nbi(boto_session):
    assert ("AWS-SageMaker-Python-SDK"
            not in boto_session.client("sagemaker")._client_config.user_agent)

    with patch("six.moves.builtins.open",
               mock_open(read_data="120.0-0")) as mo:
        sess = Session(boto_session)

        mo.assert_called_with(
            "/etc/opt/ml/sagemaker-notebook-instance-version.txt")

    assert "AWS-SageMaker-Python-SDK" in sess.sagemaker_client._client_config.user_agent
    assert "AWS-SageMaker-Python-SDK" in sess.sagemaker_runtime_client._client_config.user_agent
    assert "AWS-SageMaker-Notebook-Instance" in sess.sagemaker_client._client_config.user_agent
    assert ("AWS-SageMaker-Notebook-Instance"
            in sess.sagemaker_runtime_client._client_config.user_agent)
Exemple #7
0
def test_user_agent_injected_with_nbi_ioerror(boto_session):
    assert ("AWS-SageMaker-Python-SDK"
            not in boto_session.client("sagemaker")._client_config.user_agent)

    with patch("six.moves.builtins.open",
               MagicMock(side_effect=IOError("File not found"))) as mo:
        sess = Session(boto_session)

        mo.assert_called_with(
            "/etc/opt/ml/sagemaker-notebook-instance-version.txt")

    assert "AWS-SageMaker-Python-SDK" in sess.sagemaker_client._client_config.user_agent
    assert "AWS-SageMaker-Python-SDK" in sess.sagemaker_runtime_client._client_config.user_agent
    assert "AWS-SageMaker-Notebook-Instance" not in sess.sagemaker_client._client_config.user_agent
    assert ("AWS-SageMaker-Notebook-Instance"
            not in sess.sagemaker_runtime_client._client_config.user_agent)
def sagemaker_session(sagemaker_client_config, sagemaker_runtime_config,
                      boto_session):
    sagemaker_client_config.setdefault("config",
                                       Config(retries=dict(max_attempts=10)))
    sagemaker_client = (boto_session.client("sagemaker", **
                                            sagemaker_client_config)
                        if sagemaker_client_config else None)
    runtime_client = (boto_session.client("sagemaker-runtime", **
                                          sagemaker_runtime_config)
                      if sagemaker_runtime_config else None)

    return Session(
        boto_session=boto_session,
        sagemaker_client=sagemaker_client,
        sagemaker_runtime_client=runtime_client,
    )
Exemple #9
0
    def __init__(self,
                 model_data,
                 role=None,
                 spark_version=2.4,
                 sagemaker_session=None,
                 **kwargs):
        """Initialize a SparkMLModel.

        Args:
            model_data (str): The S3 location of a SageMaker model data
                ``.tar.gz`` file. For SparkML, this will be the output that has
                been produced by the Spark job after serializing the Model via
                MLeap.
            role (str): An AWS IAM role (either name or full ARN). The Amazon
                SageMaker training jobs and APIs that create Amazon SageMaker
                endpoints use this role to access training data and model
                artifacts. After the endpoint is created, the inference code
                might use the IAM role, if it needs to access an AWS resource.
            spark_version (str): Spark version you want to use for executing the
                inference (default: '2.4').
            sagemaker_session (sagemaker.session.Session): Session object which
                manages interactions with Amazon SageMaker APIs and any other
                AWS services needed. If not specified, the estimator creates one
                using the default AWS configuration chain. For local mode,
                please do not pass this variable.
            **kwargs: Additional parameters passed to the
                :class:`~sagemaker.model.Model` constructor.

        .. tip::

            You can find additional parameters for initializing this class at
            :class:`~sagemaker.model.Model`.
        """
        # For local mode, sagemaker_session should be passed as None but we need a session to get
        # boto_region_name
        region_name = (sagemaker_session or Session()).boto_region_name
        image_uri = image_uris.retrieve(framework_name,
                                        region_name,
                                        version=spark_version)
        super(SparkMLModel, self).__init__(
            image_uri,
            model_data,
            role,
            predictor_cls=SparkMLPredictor,
            sagemaker_session=sagemaker_session,
            **kwargs,
        )
Exemple #10
0
def sagemaker_session(sagemaker_client_config, sagemaker_runtime_config,
                      boto_config):
    boto_session = boto3.Session(
        **boto_config) if boto_config else boto3.Session(
            region_name=DEFAULT_REGION)
    sagemaker_client_config.setdefault('config',
                                       Config(retries=dict(max_attempts=10)))
    sagemaker_client = boto_session.client(
        'sagemaker', **
        sagemaker_client_config) if sagemaker_client_config else None
    runtime_client = (boto_session.client('sagemaker-runtime', **
                                          sagemaker_runtime_config)
                      if sagemaker_runtime_config else None)

    return Session(boto_session=boto_session,
                   sagemaker_client=sagemaker_client,
                   sagemaker_runtime_client=runtime_client)
    def __init__(self, endpoint, sagemaker_session=None):
        """
        Initializes a SparkMLPredictor which should be used with SparkMLModel to perform predictions against SparkML
        models serialized via MLeap. The response is returned in text/csv format which is the default response format
        for SparkML Serving container.

        Args:
            endpoint (str): The name of the endpoint to perform inference on.
            sagemaker_session (sagemaker.session.Session): Session object which manages interactions with
                Amazon SageMaker APIs and any other AWS services needed. If not specified, the estimator creates one
                using the default AWS configuration chain.
        """
        sagemaker_session = sagemaker_session or Session()
        super(SparkMLPredictor,
              self).__init__(endpoint=endpoint,
                             sagemaker_session=sagemaker_session,
                             serializer=csv_serializer,
                             content_type=CONTENT_TYPE_CSV)
Exemple #12
0
def sagemaker_session_with_custom_bucket(boto_session, sagemaker_client_config,
                                         sagemaker_runtime_config,
                                         custom_bucket_name):
    sagemaker_client_config.setdefault("config",
                                       Config(retries=dict(max_attempts=10)))
    sagemaker_client = (boto_session.client("sagemaker", **
                                            sagemaker_client_config)
                        if sagemaker_client_config else None)
    runtime_client = (boto_session.client("sagemaker-runtime", **
                                          sagemaker_runtime_config)
                      if sagemaker_runtime_config else None)

    return Session(
        boto_session=boto_session,
        sagemaker_client=sagemaker_client,
        sagemaker_runtime_client=runtime_client,
        default_bucket=custom_bucket_name,
    )
def test_sagemaker_session_does_not_create_bucket_on_init(
        sagemaker_client_config, sagemaker_runtime_config, boto_session):
    sagemaker_client_config.setdefault("config",
                                       Config(retries=dict(max_attempts=10)))
    sagemaker_client = (boto_session.client("sagemaker", **
                                            sagemaker_client_config)
                        if sagemaker_client_config else None)
    runtime_client = (boto_session.client("sagemaker-runtime", **
                                          sagemaker_runtime_config)
                      if sagemaker_runtime_config else None)

    Session(
        boto_session=boto_session,
        sagemaker_client=sagemaker_client,
        sagemaker_runtime_client=runtime_client,
        default_bucket=CUSTOM_BUCKET_NAME,
    )

    s3 = boto3.resource("s3", region_name=boto_session.region_name)
    assert s3.Bucket(CUSTOM_BUCKET_NAME).creation_date is None
def upload(src, dst, gz, session: sagemaker.Session, root='.'):
    dst = cli_argument(dst, session=session)
    if not os.path.exists(src):
        raise click.UsageError("Source must exist")
    if not dst.startswith('s3://'):
        if dst.startswith('/'):
            dst = dst[1:]
        bucket = session.default_bucket()
        dst = 's3://{}/{}'.format(bucket, dst)
    url = urlparse(dst)
    assert url.scheme == 's3'
    bucket = url.netloc
    key = url.path
    if key.startswith('/'):
        key = key[1:]
    if os.path.isfile(src):
        if gz:
            raise click.UsageError(
                "Option gz is only valid for source directories")
        s3 = session.boto_session.client('s3')
        s3.upload_file(src, bucket, key)
    elif os.path.isdir(src):
        if gz:
            if not re.match(".*\\.(tar\\.gz||tgz)$", dst, re.IGNORECASE):
                raise click.UsageError(
                    "Destination should end in .tar.gz or tgz")
            s3_dst = os.path.dirname(dst)
            file_name = os.path.basename(dst)
            with _tmpdir() as tmp:
                p = os.path.join(tmp, file_name)
                with tarfile.open(p, 'w:gz') as arc:
                    arc.add(name=src, arcname=root, recursive=True)
                s3 = session.boto_session.client('s3')
                s3.upload_file(p, bucket, key)
        else:
            S3Uploader.upload(local_path=src,
                              desired_s3_uri=dst,
                              sagemaker_session=session)
    else:
        raise click.UsageError("Source must be file or directory")
def create_clarify_bias_job(event) :
    
    role = event["Input"]["Payload"]["security-config"]["iam_role"]
    ws_params = event["Input"]["Payload"]["workspace-config"]
    data_params = event["Input"]["Payload"]["data-config"]
    model_params = event["Input"]["Payload"]["model-config"]
    automl_params = event["Input"]["Payload"]["automl-config"]
    bias_analysis_params = event["Input"]["Payload"]["bias-analysis-config"]
    
    # This is a temporary workaround. The bias detection job behaves differently when
    # files are split. Remove when the bug is fixed.
    ##############################################################################################
    merged_files_dst = "s3://{}/{}/{}".format(  ws_params["s3_bucket"],
                                                ws_params["s3_prefix"],
                                                "data/merged")
    
    input_path = create_merged_dataset(automl_params["data_uri"], merged_files_dst, automl_params["target_name"])
    ################################## End Workaround ############################################

    session = Session()
    clarify_processor = clarify.SageMakerClarifyProcessor(  role=role,
                                                            instance_count=bias_analysis_params["instance_count"],
                                                            instance_type=bias_analysis_params["instance_type"],
                                                            sagemaker_session=session)
                                                          
    output_uri = "s3://{}/{}/{}".format(ws_params["s3_bucket"],
                                        ws_params["s3_prefix"],
                                        bias_analysis_params["output_prefix"])
    
    bias_data_config = clarify.DataConfig(  s3_data_input_path=input_path,
                                            s3_output_path=output_uri,
                                            label=automl_params["target_name"],
                                            headers=get_data_columns(automl_params["data_uri"]),
                                            dataset_type='text/csv')
        
    model_config = clarify.ModelConfig( model_name=model_params["model_name"],
                                        instance_type=model_params["instance_type"],
                                        instance_count=model_params["instance_count"],
                                        accept_type='text/csv',
                                        content_type = 'text/csv')
        
    pred_params = bias_analysis_params["prediction-config"]
    predictions_config = clarify.ModelPredictedLabelConfig( label= pred_params["label"],
                                                            probability= pred_params["probability"],
                                                            probability_threshold=pred_params["probability_threshold"],
                                                            label_headers=pred_params["label_headers"])
        
    bias_params = bias_analysis_params["bias-config"]
    bias_config = clarify.BiasConfig(   label_values_or_threshold=bias_params["label_values_or_threshold"],
                                        facet_name=bias_params["facet_name"],
                                        facet_values_or_threshold=bias_params["facet_values_or_threshold"],
                                        group_name=bias_params["group_name"])   
                                        
    clarify_processor.run_bias( job_name = bias_analysis_params["job_name"],
                                data_config=bias_data_config,
                                bias_config=bias_config,
                                model_config=model_config,
                                model_predicted_label_config=predictions_config,
                                pre_training_methods='all',
                                post_training_methods='all',
                                wait=False,
                                logs=False)                                
def test_get_caller_identity_arn_from_an_user(boto_session):
    sess = Session(boto_session)
    sess.boto_session.client('sts').get_caller_identity.return_value = {'Arn': 'arn:aws:iam::369233609183:user/mia'}

    actual = sess.get_caller_identity_arn()
    assert actual == 'arn:aws:iam::369233609183:user/mia'
Exemple #17
0
def test_delete_endpoint(boto_session):
    sess = Session(boto_session)
    sess.delete_endpoint('my_endpoint')

    boto_session.client().delete_endpoint.assert_called_with(
        EndpointName='my_endpoint')
Exemple #18
0
 def _status(self):
     return self._process_latest(Session().describe_processing_job)
Exemple #19
0
 def _delete(self):
     self._process_latest(Session().stop_processing_job)
     return self._process_latest(Session().describe_processing_job)
Exemple #20
0
from __future__ import absolute_import

import itertools
import json
import os
import shutil
import subprocess

import click
import pandas as pd
from sagemaker import Session
from sagemaker.tensorflow import TensorFlow

dir_path = os.path.dirname(os.path.realpath(__file__))
benchmark_results_dir = os.path.join("s3://",
                                     Session().default_bucket(),
                                     "hvd-benchmarking")


@click.group()
def cli():
    pass


def generate_report():
    results_dir = os.path.join(dir_path, "results")

    if os.path.exists(results_dir):
        shutil.rmtree(results_dir)

    subprocess.call(
Exemple #21
0
# or in the "license" file accompanying this file. This file is
# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific
# language governing permissions and limitations under the License.

from __future__ import absolute_import

import argparse
import itertools
import os

from sagemaker import Session
from sagemaker.estimator import Framework
from sagemaker.tensorflow import TensorFlow

default_bucket = Session().default_bucket
dir_path = os.path.dirname(os.path.realpath(__file__))

_DEFAULT_HYPERPARAMETERS = {
    'batch_size': 32,
    'model': 'resnet32',
    'num_epochs': 10,
    'data_format': 'NHWC',
    'summary_verbosity': 1,
    'save_summaries_steps': 10,
    'data_name': 'cifar10'
}


class ScriptModeTensorFlow(Framework):
    """This class is temporary until the final version of Script Mode is released.
Exemple #22
0
    def __init__(
        self,
        entry_point="train.py",
        source_dir=None,
        hyperparameters=None,
        py_version="py3",
        framework_version={
            "transformers": "4.1.1",
            "datasets": "1.1.3"
        },
        image_name=None,
        distributions=None,
        **kwargs,
    ):
        # validating framework_version and python version
        self.framework_version = framework_version
        self.py_version = py_version
        validate_version_or_image_args(self.framework_version, self.py_version)

        # checking for instance_type
        if "instance_type" in kwargs:
            self.instance_type = kwargs["instance_type"]
        else:
            self.instance_type = "local"

        # build ecr_uri
        self.image_uri = self._get_container_image("training")

        # using or create a sagemaker session
        if "sagemaker_session" in kwargs:
            self.sagemaker_session = kwargs["sagemaker_session"]
        else:
            self.sagemaker_session = Session()

        #  for distributed training
        #     if distribution is not None:
        #     instance_type = renamed_kwargs(
        #         "train_instance_type", "instance_type", kwargs.get("instance_type"), kwargs
        #     )

        #     validate_smdistributed(
        #         instance_type=instance_type,
        #         framework_name=self._framework_name,
        #         framework_version=framework_version,
        #         py_version=py_version,
        #         distribution=distribution,
        #         image_uri=image_uri,
        #     )

        #     warn_if_parameter_server_with_multi_gpu(
        #         training_instance_type=instance_type, distribution=distribution
        #     )

        # if "enable_sagemaker_metrics" not in kwargs:
        #     # enable sagemaker metrics for PT v1.3 or greater:
        #     if self.framework_version and Version(self.framework_version) >= Version("1.3"):
        #         kwargs["enable_sagemaker_metrics"] = True

        super(HuggingFace, self).__init__(entry_point,
                                          source_dir,
                                          hyperparameters,
                                          image_uri=self.image_uri,
                                          **kwargs)
def n_virginia_sagemaker_session(n_virginia_region):
    return Session(boto_session=boto3.Session(region_name=n_virginia_region))
def sagemaker_session():
    return Session(boto_session=boto3.Session(region_name=REGION))
    for array in np.array_split(dataset[:, :-1], 100):
        result = model.predict(array)
        predictions += [r['predicted_label'] for r in result['predictions']]

    predictions = np.array(predictions)

    return deo_from_list(dataset, predictions, groupA_idxs, groupB_idxs)


# ## Prerequisites and Data <a class="anchor" id="pre_and_data">
# ### Initialize SageMaker  <a class="anchor" id="initsagemaker">

# In[5]:

from sagemaker import Session
bucket = Session().default_bucket()  #'fairness-test2'
prefix = 'sagemaker/DEMO-linear-adult'

# Define IAM role
from sagemaker import get_execution_role
import pandas as pd
import numpy as np
import urllib
import os
import sklearn.preprocessing as preprocessing
import seaborn as sns

role = get_execution_role()

# ### Download data <a class="anchor" id="download_data">
# Data Source: [https://archive.ics.uci.edu/ml/machine-learning-databases/adult/](https://archive.ics.uci.edu/ml/machine-learning-databases/adult/)
def model_create(job,
                 model_artifact,
                 name,
                 session: sagemaker.Session,
                 inference_image,
                 inference_image_path,
                 inference_image_accounts,
                 role,
                 force,
                 multimodel=False,
                 accelerator_type=None):
    job = cli_argument(job, session=session)
    name = cli_argument(name, session=session)
    model_artifact = cli_argument(model_artifact, session=session)
    image_config = Image(tag=inference_image,
                         path=inference_image_path,
                         accounts=inference_image_accounts)
    image_uri = ecr_ensure_image(image=image_config,
                                 session=session.boto_session)
    if (job and model_artifact) or (not (job or model_artifact)):
        raise click.UsageError('Specify one of job_name or model_artifact')
    if model_artifact and not name:
        raise click.UsageError('name is required if job is not provided')
    iam = session.boto_session.client('iam')
    client = session.boto_session.client('sagemaker')
    role = ensure_inference_role(iam=iam, role_name=role)
    if job:
        client = session.boto_session.client('sagemaker')
        model_artifact = training_describe(
            job_name=job,
            field='ModelArtifacts.S3ModelArtifacts',
            session=session)
        if not name:
            name = job
        print("Creating model [{}] from job [{}] artifact [{}]".format(
            name, job, model_artifact))
    else:
        if not model_artifact.startswith('s3://'):
            if model_artifact.startswith('/'):
                model_artifact = model_artifact[1:]
            bucket = session.default_bucket()
            model_artifact = 's3://{}/{}'.format(bucket, model_artifact)
        print("Creating model [{}] from artifact [{}]".format(
            name, model_artifact))

    if model_exists(name=name, client=client):
        if force:
            print("Deleting existing model")
            model_delete(name=name, client=client)
        else:
            raise click.UsageError('Specify force if overwriting model')
    model = sagemaker.Model(
        image_uri=image_uri,
        model_data=model_artifact,
        role=role,
        predictor_cls=None,
        env=None,
        name=name,
        # vpc_config=None,
        sagemaker_session=session,
        # enable_network_isolation=False,
        # model_kms_key=None
    )
    container_def = sagemaker.container_def(
        model.image_uri,
        model.model_data,
        model.env,
        container_mode='MultiModel' if multimodel else 'SingleModel')
    """
    client.create_model(
    ModelName='string',
    PrimaryContainer={
        'ContainerHostname': 'string',
        'Image': 'string',
        'ImageConfig': {
            'RepositoryAccessMode': 'Platform'|'Vpc'
        },
        'Mode': 'SingleModel'|'MultiModel',
        'ModelDataUrl': 'string',
        'Environment': {
            'string': 'string'
        },
        'ModelPackageName': 'string'
    },
    """

    # self._ensure_base_name_if_needed(container_def["Image"])
    # self._set_model_name_if_needed()

    enable_network_isolation = model.enable_network_isolation()

    # self._init_sagemaker_session_if_does_not_exist(instance_type)
    session.create_model(
        model.name,
        model.role,
        container_def,
        vpc_config=model.vpc_config,
        enable_network_isolation=enable_network_isolation,
        # tags=tags,
    )
Exemple #27
0
def sagemaker_session():
    sess = Session()
    return sess
def test_delete_endpoint_config(boto_session):
    sess = Session(boto_session)
    sess.delete_endpoint_config('my_endpoint_config')

    boto_session.client().delete_endpoint_config.assert_called_with(EndpointConfigName='my_endpoint_config')
def test_delete_endpoint(boto_session):
    sess = Session(boto_session)
    sess.delete_endpoint('my_endpoint')

    boto_session.client().delete_endpoint.assert_called_with(EndpointName='my_endpoint')
    def __init__(
        self,
        entry_point="train.py",
        source_dir: str = None,
        hyperparameters: Optional[Dict[str, str]] = None,
        framework_version={"transformers": "4.1.1", "datasets": "1.1.3"},
        image_uri: Optional[str] = None,
        huggingface_token: str = None,
        # distribution=None,
        **kwargs,
    ):
        """
        Args:
            entry_point (str): Path (absolute or relative) to the Python source
                file which should be executed as the entry point to training.
                If ``source_dir`` is specified, then ``entry_point``
                must point to a file located at the root of ``source_dir``.
            source_dir (str): Path (absolute, relative or an S3 URI) to a directory
                with any other training source code dependencies aside from the entry
                point file (default: None). If ``source_dir`` is an S3 URI, it must
                point to a tar.gz file. Structure within this directory are preserved
                when training on Amazon SageMaker.
            hyperparameters (dict): Hyperparameters that will be used for
                training (default: None). The hyperparameters are made
                accessible as a dict[str, str] to the training code on
                SageMaker. For convenience, this accepts other types for keys
                and values, but ``str()`` will be called to convert them before
                training.
            framework_version (dict): Transformers and datasets versions you want to use for
                executing your model training code. Defaults to ``{"transformers": "4.1.1", "datasets": "1.1.3"}``. Required unless
                ``image_uri`` is provided. List of supported versions:
                https://github.com/aws/sagemaker-python-sdk#pytorch-sagemaker-estimators.
            image_uri (str): If specified, the estimator will use this image
                for training and hosting, instead of selecting the appropriate
                SageMaker official image based on framework_version and
                py_version. It can be an ECR url or dockerhub image and tag.
                Examples:
                    * ``123412341234.dkr.ecr.us-west-2.amazonaws.com/my-custom-image:1.0``
                    * ``custom-image:latest``
                If ``framework_version`` or ``py_version`` are ``None``, then
                ``image_uri`` is required. If also ``None``, then a ``ValueError``
                will be raised.
            huggingface_token (str): HuggingFace Hub authentication token for uploading your model files.
                You can get this by either using the [transformers-cli](https://huggingface.co/transformers/model_sharing.html) with `transfomers-cli login`
                or using the `login()` method of `transformers.hf_api`. If the HuggingFace Token is provided the model will uploaded automatically to the
                model hub using the `base_job_name` as repository name.
        """
        # validating framework_version and python version
        self.framework_version = framework_version
        self.py_version = "py3"
        validate_version_or_image_args(self.framework_version, self.py_version)

        # checking for instance_type
        if "instance_type" in kwargs:
            self.instance_type = kwargs["instance_type"]
        else:
            self.instance_type = "local"

        # build ecr_uri
        self.image_uri = self._get_container_image("training")

        # using or create a sagemaker session
        if "sagemaker_session" in kwargs:
            self.sagemaker_session = kwargs["sagemaker_session"]
        else:
            self.sagemaker_session = Session()

        super(HuggingFace, self).__init__(entry_point, source_dir, hyperparameters, image_uri=self.image_uri, **kwargs)

        if huggingface_token:
            logger.info(
                f"estimator initialized with HuggingFace Token, model will be uploaded to hub using the {self.base_job_name} as repostiory name"
            )
            self.huggingface_token = huggingface_token
Exemple #31
0
# Once this is complete, click on **Update Trust Policy** and you are done.

# ## 1.2 Setup S3 bucket
#
# First, we need to setup an S3 bucket within your account, and upload the necessary files to this bucket. To setup the bucket, we will run the first code block, labeled Setup S3 bucket. To run the cell while the code cell is selected, you can either press Shift and Return at the same time or select the Run button at the top of the Jupyter notebook.

# In[2]:

import boto3
import botocore
from botocore.exceptions import ClientError

from sagemaker import Session as Sess

# SageMaker session
sess = Sess()

# Boto3 session
session = boto3.session.Session()

s3 = session.resource('s3')
account = session.client('sts').get_caller_identity()['Account']
region = session.region_name
bucket_name = 'sagemaker-glue-process-{}-{}'.format("test", region)

try:
    if region == 'us-east-1':
        s3.create_bucket(Bucket=bucket_name)
    else:
        s3.create_bucket(
            Bucket=bucket_name,
Exemple #32
0
 def _logs(self):
     return self._process_latest(Session().logs_for_job)
    def __init__(
        self,
        framework_version,
        role,
        instance_type,
        instance_count,
        command=None,
        volume_size_in_gb=30,
        volume_kms_key=None,
        output_kms_key=None,
        max_runtime_in_seconds=None,
        base_job_name=None,
        sagemaker_session=None,
        env=None,
        tags=None,
        network_config=None,
    ):
        """Initialize an ``SKLearnProcessor`` instance.

        The SKLearnProcessor handles Amazon SageMaker processing tasks for jobs using scikit-learn.

        Args:
            framework_version (str): The version of scikit-learn.
            role (str): An AWS IAM role name or ARN. The Amazon SageMaker training jobs
                and APIs that create Amazon SageMaker endpoints use this role
                to access training data and model artifacts. After the endpoint
                is created, the inference code might use the IAM role, if it
                needs to access an AWS resource.
            instance_type (str): Type of EC2 instance to use for
                processing, for example, 'ml.c4.xlarge'.
            instance_count (int): The number of instances to run
                the Processing job with. Defaults to 1.
            command ([str]): The command to run, along with any command-line flags.
                Example: ["python3", "-v"]. If not provided, ["python3"] or ["python2"]
                will be chosen based on the py_version parameter.
            volume_size_in_gb (int): Size in GB of the EBS volume to
                use for storing data during processing (default: 30).
            volume_kms_key (str): A KMS key for the processing
                volume.
            output_kms_key (str): The KMS key id for all ProcessingOutputs.
            max_runtime_in_seconds (int): Timeout in seconds.
                After this amount of time Amazon SageMaker terminates the job
                regardless of its current status.
            base_job_name (str): Prefix for processing name. If not specified,
                the processor generates a default job name, based on the
                training image name and current timestamp.
            sagemaker_session (sagemaker.session.Session): Session object which
                manages interactions with Amazon SageMaker APIs and any other
                AWS services needed. If not specified, the processor creates one
                using the default AWS configuration chain.
            env (dict): Environment variables to be passed to the processing job.
            tags ([dict]): List of tags to be passed to the processing job.
            network_config (sagemaker.network.NetworkConfig): A NetworkConfig
                object that configures network isolation, encryption of
                inter-container traffic, security group IDs, and subnets.
        """
        if not command:
            command = ["python3"]

        session = sagemaker_session or Session()
        region = session.boto_region_name

        image_uri = image_uris.retrieve(defaults.SKLEARN_NAME,
                                        region,
                                        version=framework_version,
                                        instance_type=instance_type)

        super(SKLearnProcessor, self).__init__(
            role=role,
            image_uri=image_uri,
            instance_count=instance_count,
            instance_type=instance_type,
            command=command,
            volume_size_in_gb=volume_size_in_gb,
            volume_kms_key=volume_kms_key,
            output_kms_key=output_kms_key,
            max_runtime_in_seconds=max_runtime_in_seconds,
            base_job_name=base_job_name,
            sagemaker_session=session,
            env=env,
            tags=tags,
            network_config=network_config,
        )
Exemple #34
0
from json import dumps
from scipy.sparse import lil_matrix
from sagemaker import Session
from sagemaker.predictor import RealTimePredictor, json_deserializer

sm_sess = Session()


def build_spare_matrix_payload(movie_id):
    # 943 - nb_users_train
    x_movie_id = 943 + int(movie_id)

    # 2625 - nb_features
    X_new = lil_matrix((1, 2625)).astype('float32')
    X_new[0, 944] = 1
    X_new[0, x_movie_id] = 1

    payload = X_new[0].toarray()

    return (payload)


def post_process(result):
    score = result["predictions"][0]["score"]
    pp_score = round(score * 100, 2)
    predicted_label = result["predictions"][0]["predicted_label"]

    if predicted_label == 1:
        txt = "Chance that you will like this: " + str(pp_score)
    else:
        txt = "Chance that you will like this: " + str(round(
Exemple #35
0
def sagemaker_session(region):
    return Session(boto_session=boto3.Session(region_name=region))
from __future__ import absolute_import

import itertools
import json
import os
import shutil
import subprocess

import click
import pandas as pd
from sagemaker import Session
from sagemaker.tensorflow import TensorFlow

dir_path = os.path.dirname(os.path.realpath(__file__))
benchmark_results_dir = os.path.join('s3://', Session().default_bucket(), 'hvd-benchmarking')


@click.group()
def cli():
    pass


def generate_report():
    results_dir = os.path.join(dir_path, 'results')

    if os.path.exists(results_dir):
        shutil.rmtree(results_dir)

    subprocess.call(['aws', 's3', 'cp', '--recursive', benchmark_results_dir, results_dir])