Ejemplo n.º 1
0
    def deploy(self,
               initial_instance_count,
               instance_type,
               endpoint_name=None,
               tags=None,
               wait=True):
        """Deploy this ``Model`` to an ``Endpoint`` and optionally return a
        ``Predictor``.

        Create a SageMaker ``Model`` and ``EndpointConfig``, and deploy an
        ``Endpoint`` from this ``Model``. If ``self.predictor_cls`` is not None,
        this method returns a the result of invoking ``self.predictor_cls`` on
        the created endpoint name.

        The name of the created model is accessible in the ``name`` field of
        this ``Model`` after deploy returns

        The name of the created endpoint is accessible in the
        ``endpoint_name`` field of this ``Model`` after deploy returns.

        Args:
            initial_instance_count (int): The initial number of instances to run
                in the ``Endpoint`` created from this ``Model``.
            instance_type (str): The EC2 instance type to deploy this Model to.
                For example, 'ml.p2.xlarge'.
            endpoint_name (str): The name of the endpoint to create (default:
                None). If not specified, a unique endpoint name will be created.
            tags (List[dict[str, str]]): The list of tags to attach to this
                specific endpoint.
            wait (bool): Whether the call should wait until the deployment of
                model completes (default: True).

        Returns:
            callable[string, sagemaker.session.Session] or None: Invocation of
            ``self.predictor_cls`` on the created endpoint name, if ``self.predictor_cls``
            is not None. Otherwise, return None.
        """
        if not self.sagemaker_session:
            self.sagemaker_session = Session()

        containers = self.pipeline_container_def(instance_type)

        self.name = self.name or name_from_image(containers[0]["Image"])
        self.sagemaker_session.create_model(self.name,
                                            self.role,
                                            containers,
                                            vpc_config=self.vpc_config)

        production_variant = sagemaker.production_variant(
            self.name, instance_type, initial_instance_count)
        self.endpoint_name = endpoint_name or self.name
        self.sagemaker_session.endpoint_from_production_variants(
            self.endpoint_name, [production_variant], tags, wait=wait)
        if self.predictor_cls:
            return self.predictor_cls(self.endpoint_name,
                                      self.sagemaker_session)
        return None
Ejemplo n.º 2
0
    def upload(local_path,
               desired_s3_uri,
               kms_key=None,
               sagemaker_session=None):
        """Static method that uploads a given file or directory to S3.

        Args:
            local_path (str): Path (absolute or relative) of local file or directory to upload.
            desired_s3_uri (str): The desired S3 location to upload to. It is the prefix to
                which the local filename will be added.
            kms_key (str): The KMS key to use to encrypt the files.
            sagemaker_session (sagemaker.session.Session): Session object which
                manages interactions with Amazon SageMaker APIs and any other
                AWS services needed. If not specified, the estimator creates one
                using the default AWS configuration chain.

        Returns:
            The S3 uri of the uploaded file(s).

        """
        sagemaker_session = sagemaker_session or Session()
        bucket, key_prefix = parse_s3_url(url=desired_s3_uri)
        if kms_key is not None:
            extra_args = {
                "SSEKMSKeyId": kms_key,
                "ServerSideEncryption": "aws:kms"
            }

        else:
            extra_args = None

        return sagemaker_session.upload_data(path=local_path,
                                             bucket=bucket,
                                             key_prefix=key_prefix,
                                             extra_args=extra_args)
Ejemplo n.º 3
0
    def attach(cls,
               tuning_job_name,
               sagemaker_session=None,
               job_details=None,
               estimator_cls=None):
        sagemaker_session = sagemaker_session or Session()

        if job_details is None:
            job_details = sagemaker_session.sagemaker_client \
                .describe_hyper_parameter_tuning_job(HyperParameterTuningJobName=tuning_job_name)

        estimator_cls = cls._prepare_estimator_cls(
            estimator_cls, job_details['TrainingJobDefinition'])
        estimator = cls._prepare_estimator_from_job_description(
            estimator_cls, job_details['TrainingJobDefinition'],
            sagemaker_session)
        init_params = cls._prepare_init_params_from_job_description(
            job_details)

        tuner = cls(estimator=estimator, **init_params)
        tuner.latest_tuning_job = _TuningJob(
            sagemaker_session=sagemaker_session,
            tuning_job_name=tuning_job_name)

        return tuner
Ejemplo n.º 4
0
    def upload_string_as_file_body(body,
                                   desired_s3_uri=None,
                                   kms_key=None,
                                   sagemaker_session=None):
        """Static method that uploads a given file or directory to S3.

        Args:
            body (str): String representing the body of the file.
            desired_s3_uri (str): The desired S3 uri to upload to.
            kms_key (str): The KMS key to use to encrypt the files.
            sagemaker_session (sagemaker.session.Session): Session object which
                manages interactions with Amazon SageMaker APIs and any other
                AWS services needed. If not specified, the estimator creates one
                using the default AWS configuration chain.

        Returns:
            str: The S3 uri of the uploaded file(s).

        """
        sagemaker_session = sagemaker_session or Session()
        bucket, key = parse_s3_url(desired_s3_uri)

        sagemaker_session.upload_string_as_file_body(body=body,
                                                     bucket=bucket,
                                                     key=key,
                                                     kms_key=kms_key)

        return desired_s3_uri
Ejemplo n.º 5
0
    def __init__(self,
                 training_job_name,
                 metric_names=None,
                 sagemaker_session=None,
                 start_time=None,
                 end_time=None,
                 period=None):
        """Initialize a ``TrainingJobAnalytics`` instance.

        Args:
            training_job_name (str): name of the TrainingJob to analyze.
            metric_names (list, optional): string names of all the metrics to collect for this training job.
                If not specified, then it will use all metric names configured for this job.
            sagemaker_session (sagemaker.session.Session): Session object which manages interactions with
                Amazon SageMaker APIs and any other AWS services needed. If not specified, one is specified
                using the default AWS configuration chain.
        """
        sagemaker_session = sagemaker_session or Session()
        self._sage_client = sagemaker_session.sagemaker_client
        self._cloudwatch = sagemaker_session.boto_session.client('cloudwatch')
        self._training_job_name = training_job_name
        self._start_time = start_time
        self._end_time = end_time
        self._period = period or METRICS_PERIOD_DEFAULT

        if metric_names:
            self._metric_names = metric_names
        else:
            self._metric_names = self._metric_names_for_training_job()
        self.clear_cache()
Ejemplo n.º 6
0
    def __init__(self,
                 endpoint,
                 sagemaker_session=None,
                 serializer=None,
                 deserializer=None,
                 content_type=None,
                 accept=None):
        """Initialize a ``RealTimePredictor``.

        Behavior for serialization of input data and deserialization of result data
        can be configured through initializer arguments. If not specified, a sequence
        of bytes is expected and the API sends it in the request body without modifications.
        In response, the API returns the sequence of bytes from the prediction result without any modifications.

        Args:
            endpoint (str): Name of the Amazon SageMaker endpoint to which requests are sent.
            sagemaker_session (sagemaker.session.Session): A SageMaker Session object, used for SageMaker
               interactions (default: None). If not specified, one is created using the default AWS configuration chain.
            serializer (callable): Accepts a single argument, the input data, and returns a sequence
                of bytes. It may provide a ``content_type`` attribute that defines the endpoint request content type.
                If not specified, a sequence of bytes is expected for the data.
            deserializer (callable): Accepts two arguments, the result data and the response content type,
                and returns a sequence of bytes. It may provide a ``content_type`` attribute that defines the endpoint
                response's "Accept" content type. If not specified, a sequence of bytes is expected for the data.
            content_type (str): The invocation's "ContentType", overriding any ``content_type`` from
                the serializer (default: None).
            accept (str): The invocation's "Accept", overriding any accept from the deserializer (default: None).
        """
        self.endpoint = endpoint
        self.sagemaker_session = sagemaker_session or Session()
        self.serializer = serializer
        self.deserializer = deserializer
        self.content_type = content_type or getattr(serializer, 'content_type',
                                                    None)
        self.accept = accept or getattr(deserializer, 'accept', None)
Ejemplo n.º 7
0
    def __init__(self,
                 role,
                 train_instance_count,
                 train_instance_type,
                 train_volume_size=30,
                 train_max_run=24 * 60 * 60,
                 input_mode='File',
                 output_path=None,
                 output_kms_key=None,
                 base_job_name=None,
                 sagemaker_session=None):
        """Initialize an ``EstimatorBase`` instance.

        Args:
            role (str): An AWS IAM role (either name or full ARN). The Amazon SageMaker training jobs and APIs
                that create Amazon SageMaker endpoints use this role to access training data and model artifacts.
                After the endpoint is created, the inference code might use the IAM role,
                if it needs to access an AWS resource.
            train_instance_count (int): Number of Amazon EC2 instances to use for training.
            train_instance_type (str): Type of EC2 instance to use for training, for example, 'ml.c4.xlarge'.
            train_volume_size (int): Size in GB of the EBS volume to use for storing input data
                during training (default: 30). Must be large enough to store training data if File Mode is used
                (which is the default).
            train_max_run (int): Timeout in seconds for training (default: 24 * 60 * 60).
                After this amount of time Amazon SageMaker terminates the job regardless of its current status.
            input_mode (str): The input mode that the algorithm supports (default: 'File'). Valid modes:
                'File' - Amazon SageMaker copies the training dataset from the S3 location to a local directory.
                'Pipe' - Amazon SageMaker streams data directly from S3 to the container via a Unix-named pipe.
            output_path (str): S3 location for saving the trainig result (model artifacts and output files).
                If not specified, results are stored to a default bucket. If the bucket with the specific name
                does not exist, the estimator creates the bucket during the
                :meth:`~sagemaker.estimator.EstimatorBase.fit` method execution.
            output_kms_key (str): Optional. KMS key ID for encrypting the training output (default: None).
            base_job_name (str): Prefix for training job name when the :meth:`~sagemaker.estimator.EstimatorBase.fit`
                method launches. If not specified, the estimator generates a default job name, based on
                the training image name and current timestamp.
            sagemaker_session (sagemaker.session.Session): Session object which manages interactions with
                Amazon SageMaker APIs and any other AWS services needed. If not specified, the estimator creates one
                using the default AWS configuration chain.
        """
        self.role = role
        self.train_instance_count = train_instance_count
        self.train_instance_type = train_instance_type
        self.train_volume_size = train_volume_size
        self.train_max_run = train_max_run
        self.input_mode = input_mode

        if self.train_instance_type in ('local', 'local_gpu'):
            if self.train_instance_type == 'local_gpu' and self.train_instance_count > 1:
                raise RuntimeError(
                    "Distributed Training in Local GPU is not supported")
            self.sagemaker_session = sagemaker_session or LocalSession()
        else:
            self.sagemaker_session = sagemaker_session or Session()

        self.base_job_name = base_job_name
        self._current_job_name = None
        self.output_path = output_path
        self.output_kms_key = output_kms_key
        self.latest_training_job = None
Ejemplo n.º 8
0
    def upload(local_path, desired_s3_uri, kms_key=None, session=None):
        """Static method that uploads a given file or directory to S3.

        Args:
            local_path (str): A local path to a file or directory.
            desired_s3_uri (str): The desired S3 uri to upload to.
            kms_key (str): The KMS key to use to encrypt the files.
            session (sagemaker.session.Session): Session object which
                manages interactions with Amazon SageMaker APIs and any other
                AWS services needed. If not specified, the estimator creates one
                using the default AWS configuration chain.

        Returns:
            The S3 uri of the uploaded file(s).

        """
        if session is not None:
            _session_v2_rename_warning(session)

        sagemaker_session = session or Session()
        bucket, key_prefix = parse_s3_url(url=desired_s3_uri)
        if kms_key is not None:
            extra_args = {"SSEKMSKeyId": kms_key}
        else:
            extra_args = None

        return sagemaker_session.upload_data(
            path=local_path, bucket=bucket, key_prefix=key_prefix, extra_args=extra_args
        )
    def _init_params(self, parent_data_objs):
        self._output_model_filepath = self._params['output_model_filepath']

        self._train_set, valid_set, test_set = parent_data_objs
        self._print_statistics_info(self._train_set, valid_set, test_set)

        self._num_features = len(self._train_set[0][0])

        self._bucket_name = self._params.get('bucket_name')
        if not self._bucket_name:
            self._bucket_name = Session().default_bucket()

        self._data_location = self._params.get('data_location')
        if not self._data_location:
            self._data_location = 'training/kmeans/data'

        self._output_location = self._params.get('output_location')
        if not self._output_location:
            self._output_location = 's3://{}/training/kmeans/output'.format(self._bucket_name)
        else:
            self._output_location = 's3://{}/{}'.format(self._bucket_name, self._output_location)

        self._skip_s3_dataset_uploading = str2bool(self._params.get('skip_s3_dataset_uploading'))

        self._instance_count = self._params.get('instance_count', 1)
        self._instance_type = self._params.get('instance_type', 'ml.c4.xlarge')
        self._volume_size_in_gb = self._params.get('volume_size_in_gb', 50)
        self._hyper_parameter_k = self._params.get('hyper_parameter_k', 10)
        self._epochs = self._params.get('epochs', 1)
        self._mini_batch_size = self._params.get('mini_batch_size', 500)
        self._max_runtime_in_seconds = self._params.get('max_runtime_in_seconds', 86400)
Ejemplo n.º 10
0
    def download(s3_uri, local_path, kms_key=None, session=None):
        """Static method that downloads a given S3 uri to the local machine.

        Args:
            s3_uri (str): An S3 uri to download from.
            local_path (str): A local path to download the file(s) to.
            kms_key (str): The KMS key to use to decrypt the files.
            session (sagemaker.session.Session): Session object which
                manages interactions with Amazon SageMaker APIs and any other
                AWS services needed. If not specified, the estimator creates one
                using the default AWS configuration chain.

        """
        if session is not None:
            _session_v2_rename_warning(session)

        sagemaker_session = session or Session()
        bucket, key_prefix = parse_s3_url(url=s3_uri)
        if kms_key is not None:
            extra_args = {"SSECustomerKey": kms_key}
        else:
            extra_args = None

        sagemaker_session.download_data(
            path=local_path, bucket=bucket, key_prefix=key_prefix, extra_args=extra_args
        )
    def from_string(
        cls, constraints_file_string, kms_key=None, file_name=None, sagemaker_session=None
    ):
        """Generates a Constraints object from an s3 uri.

        Args:
            constraints_file_string (str): The uri of the constraints JSON file.
            kms_key (str): The kms key to be used to encrypt the file in S3.
            file_name (str): The file name to use when uploading to S3.
            sagemaker_session (sagemaker.session.Session): A SageMaker Session
                object, used for SageMaker interactions (default: None). If not
                specified, one is created using the default AWS configuration
                chain.

        Returns:
            sagemaker.model_monitor.Constraints: The instance of Constraints generated from
                the s3 uri.

        """
        sagemaker_session = sagemaker_session or Session()
        file_name = file_name or "constraints.json"
        desired_s3_uri = s3.s3_path_join(
            "s3://", sagemaker_session.default_bucket(), "monitoring", str(uuid.uuid4()), file_name
        )
        s3_uri = s3.S3Uploader.upload_string_as_file_body(
            body=constraints_file_string,
            desired_s3_uri=desired_s3_uri,
            kms_key=kms_key,
            sagemaker_session=sagemaker_session,
        )

        return Constraints.from_s3_uri(
            constraints_file_s3_uri=s3_uri, kms_key=kms_key, sagemaker_session=sagemaker_session
        )
Ejemplo n.º 12
0
 def __init__(self, model_data, role, sagemaker_session=None):
     sagemaker_session = sagemaker_session or Session()
     repo = '{}:{}'.format(LinearLearner.repo_name, LinearLearner.repo_version)
     image = '{}/{}'.format(registry(sagemaker_session.boto_session.region_name), repo)
     super(LinearLearnerModel, self).__init__(model_data, image, role,
                                              predictor_cls=LinearLearnerPredictor,
                                              sagemaker_session=sagemaker_session)
Ejemplo n.º 13
0
    def __init__(
            self,
            endpoint_name,
            sagemaker_session=None,
            serializer=IdentitySerializer(),
            deserializer=BytesDeserializer(),
    ):
        """Initialize a ``Predictor``.

        Behavior for serialization of input data and deserialization of
        result data can be configured through initializer arguments. If not
        specified, a sequence of bytes is expected and the API sends it in the
        request body without modifications. In response, the API returns the
        sequence of bytes from the prediction result without any modifications.

        Args:
            endpoint_name (str): Name of the Amazon SageMaker endpoint to which
                requests are sent.
            sagemaker_session (sagemaker.session.Session): A SageMaker Session
                object, used for SageMaker interactions (default: None). If not
                specified, one is created using the default AWS configuration
                chain.
            serializer (:class:`~sagemaker.serializers.BaseSerializer`): A
                serializer object, used to encode data for an inference endpoint
                (default: :class:`~sagemaker.serializers.IdentitySerializer`).
            deserializer (:class:`~sagemaker.deserializers.BaseDeserializer`): A
                deserializer object, used to decode data from an inference
                endpoint (default: :class:`~sagemaker.deserializers.BytesDeserializer`).
        """
        self.endpoint_name = endpoint_name
        self.sagemaker_session = sagemaker_session or Session()
        self.serializer = serializer
        self.deserializer = deserializer
        self._endpoint_config_name = self._get_endpoint_config_name()
        self._model_names = self._get_model_names()
    def __init__(
        self,
        predictor,
        name=None,
    ):
        """Initialize an ``AsyncPredictor``.

        Args:
            predictor (sagemaker.predictor.Predictor): General ``Predictor``
                object has useful methods and variables. ``AsyncPredictor``
                stands on top of it with capability for async inference.
        """
        self.predictor = predictor
        self.endpoint_name = predictor.endpoint_name
        self.sagemaker_session = predictor.sagemaker_session or Session()
        if self.sagemaker_session.s3_client is None:
            self.s3_client = self.sagemaker_session.boto_session.client(
                "s3",
                region_name=self.sagemaker_session.boto_region_name,
            )
        else:
            self.s3_client = self.sagemaker_session.s3_client

        self.serializer = predictor.serializer
        self.deserializer = predictor.deserializer
        self.name = name
        self._endpoint_config_name = None
        self._model_names = None
        self._context = None
        self._input_path = None
    def __init__(self, model_data, role, sagemaker_session=None, **kwargs):
        """Initialization for LinearLearnerModel.

        Args:
            model_data (str): The S3 location of a SageMaker model data
                ``.tar.gz`` file.
            role (str): An AWS IAM role (either name or full ARN). The Amazon
                SageMaker training jobs and APIs that create Amazon SageMaker
                endpoints use this role to access training data and model
                artifacts. After the endpoint is created, the inference code
                might use the IAM role, if it needs to access an AWS resource.
            sagemaker_session (sagemaker.session.Session): Session object which
                manages interactions with Amazon SageMaker APIs and any other
                AWS services needed. If not specified, the estimator creates one
                using the default AWS configuration chain.
            **kwargs: Keyword arguments passed to the ``FrameworkModel``
                initializer.
        """
        sagemaker_session = sagemaker_session or Session()
        image_uri = image_uris.retrieve(
            LinearLearner.repo_name,
            sagemaker_session.boto_region_name,
            version=LinearLearner.repo_version,
        )
        super(LinearLearnerModel,
              self).__init__(image_uri,
                             model_data,
                             role,
                             predictor_cls=LinearLearnerPredictor,
                             sagemaker_session=sagemaker_session,
                             **kwargs)
    def __init__(self, engine):
        super(SageMakerKMeansBatchPredictorIT, self).__init__(engine)
        self._dataset_s3_url = None
        self._bucket_name = None
        self._local_model_filepath = None
        self._model_s3_filepath = None
        self._results_s3_location = None
        self._model_name = None
        self._job_name = None
        self._instance_type = None
        self._instance_count = None

        self._sagemaker_session = Session()
        self._sagemaker_client = boto3.client('sagemaker')
        self._aws_helper = AwsHelper(self._logger)
        self._job_monitor = None
    def attach(cls, transform_job_name, sagemaker_session=None):
        """Attach an existing transform job to a new Transformer instance

        Args:
            transform_job_name (str): Name for the transform job to be attached.
            sagemaker_session (sagemaker.session.Session): Session object which
                manages interactions with Amazon SageMaker APIs and any other
                AWS services needed. If not specified, one will be created using
                the default AWS configuration chain.

        Returns:
            sagemaker.transformer.Transformer: The Transformer instance with the
            specified transform job attached.
        """
        sagemaker_session = sagemaker_session or Session()

        job_details = sagemaker_session.sagemaker_client.describe_transform_job(
            TransformJobName=transform_job_name
        )
        init_params = cls._prepare_init_params_from_job_description(job_details)
        transformer = cls(sagemaker_session=sagemaker_session, **init_params)
        transformer.latest_transform_job = _TransformJob(
            sagemaker_session=sagemaker_session, job_name=init_params["base_transform_job_name"]
        )

        return transformer
Ejemplo n.º 18
0
    def attach(cls, training_job_name, sagemaker_session=None, **kwargs):
        """Attach to an existing training job.

        Create an Estimator bound to an existing training job. After attaching, if
        the training job has a Complete status, it can be ``deploy()`` ed to create
        a SageMaker Endpoint and return a ``Predictor``.

        If the training job is in progress, attach will block and display log messages
        from the training job, until the training job completes.

        Args:
            training_job_name (str): The name of the training job to attach to.
            sagemaker_session (sagemaker.session.Session): Session object which manages interactions with
                Amazon SageMaker APIs and any other AWS services needed. If not specified, the estimator creates one
                using the default AWS configuration chain.
            **kwargs: Additional kwargs passed to the :class:`~sagemaker.estimator.Estimator` constructor.

        Returns:
            sagemaker.estimator.Framework: ``Estimator`` with the attached training job.
        """
        sagemaker_session = sagemaker_session or Session()

        if training_job_name is not None:
            job_details = sagemaker_session.sagemaker_client.describe_training_job(
                TrainingJobName=training_job_name)
            init_params, hp, _ = cls._prepare_estimator_params_from_job_description(
                job_details)

        else:
            # this case is only valid when called from inheriting class and then the class must declare framework
            if not hasattr(cls, '__framework_name__'):
                raise ValueError('must specify training_job name')
            init_params = dict(kwargs)
            hp = init_params.pop('hyperparameters')

        # parameters for framework classes
        framework_init_params = dict()
        framework_init_params['entry_point'] = json.loads(
            hp.get(SCRIPT_PARAM_NAME))
        framework_init_params['source_dir'] = json.loads(
            hp.get(DIR_PARAM_NAME))
        framework_init_params['enable_cloudwatch_metrics'] = json.loads(
            hp.get(CLOUDWATCH_METRICS_PARAM_NAME))
        framework_init_params['container_log_level'] = json.loads(
            hp.get(CONTAINER_LOG_LEVEL_PARAM_NAME))

        # drop json and remove other SageMaker specific additions
        hyperparameters = {entry: json.loads(hp[entry]) for entry in hp}
        framework_init_params['hyperparameters'] = hyperparameters

        init_params.update(framework_init_params)

        estimator = cls(sagemaker_session=sagemaker_session, **init_params)
        estimator.latest_training_job = _TrainingJob(
            sagemaker_session=sagemaker_session,
            training_job_name=init_params['base_job_name'])
        estimator.latest_training_job.wait()
        estimator.uploaded_code = UploadedCode(estimator.source_dir,
                                               estimator.entry_point)
        return estimator
Ejemplo n.º 19
0
    def attach(cls, training_job_name, sagemaker_session=None, model_channel_name="model"):
        """Attach to an existing training job.

        Create an Estimator bound to an existing training job, each subclass
        is responsible to implement
        ``_prepare_init_params_from_job_description()`` as this method delegates
        the actual conversion of a training job description to the arguments
        that the class constructor expects. After attaching, if the training job
        has a Complete status, it can be ``deploy()`` ed to create a SageMaker
        Endpoint and return a ``Predictor``.

        If the training job is in progress, attach will block and display log
        messages from the training job, until the training job completes.

        Examples:
            >>> my_estimator.fit(wait=False)
            >>> training_job_name = my_estimator.latest_training_job.name
            Later on:
            >>> attached_estimator = Estimator.attach(training_job_name)
            >>> attached_estimator.deploy()

        Args:
            training_job_name (str): The name of the training job to attach to.
            sagemaker_session (sagemaker.session.Session): Session object which
                manages interactions with Amazon SageMaker APIs and any other
                AWS services needed. If not specified, the estimator creates one
                using the default AWS configuration chain.
            model_channel_name (str): Name of the channel where pre-trained
                model data will be downloaded (default: 'model'). If no channel
                with the same name exists in the training job, this option will
                be ignored.

        Returns:
            Instance of the calling ``Estimator`` Class with the attached
            training job.
        """
        sagemaker_session = sagemaker_session or Session()

        job_details = sagemaker_session.sagemaker_client.describe_training_job(
            TrainingJobName=training_job_name
        )
        init_params = cls._prepare_init_params_from_job_description(job_details, model_channel_name)
        tags = sagemaker_session.sagemaker_client.list_tags(
            ResourceArn=job_details["TrainingJobArn"]
        )["Tags"]
        init_params.update(tags=tags)

        estimator = cls(sagemaker_session=sagemaker_session, **init_params)
        estimator.latest_training_job = _TrainingJob(
            sagemaker_session=sagemaker_session, job_name=training_job_name
        )
        estimator._current_job_name = estimator.latest_training_job.name
        estimator.latest_training_job.wait()

        # pylint gets confused thinking that estimator is an EstimatorBase instance, but it actually
        # is a Framework or any of its derived classes. We can safely ignore the no-member errors.
        estimator.uploaded_code = UploadedCode(
            estimator.source_dir, estimator.entry_point  # pylint: disable=no-member
        )
        return estimator
Ejemplo n.º 20
0
    def __init__(self,
                 model_data,
                 image,
                 role,
                 predictor_cls=None,
                 env=None,
                 name=None,
                 sagemaker_session=None):
        """Initialize an SageMaker ``Model``.

        Args:
            model_data (str): The S3 location of a SageMaker model data ``.tar.gz`` file.
            image (str): A Docker image URI.
            role (str): An AWS IAM role (either name or full ARN). The Amazon SageMaker training jobs and APIs
                that create Amazon SageMaker endpoints use this role to access training data and model artifacts.
                After the endpoint is created, the inference code might use the IAM role,
                if it needs to access an AWS resource.
            predictor_cls (callable[string, sagemaker.session.Session]): A function to call to create
               a predictor (default: None). If not None, ``deploy`` will return the result of invoking
               this function on the created endpoint name.
            env (dict[str, str]): Environment variables to run with ``image`` when hosted in SageMaker (default: None).
            name (str): The model name. If None, a default model name will be selected on each ``deploy``.
            sagemaker_session (sagemaker.session.Session): A SageMaker Session object, used for SageMaker
               interactions (default: None). If not specified, one is created using the default AWS configuration chain.
        """
        self.model_data = model_data
        self.image = image
        self.role = role
        self.predictor_cls = predictor_cls
        self.env = env or {}
        self.name = name
        self.sagemaker_session = sagemaker_session or Session()
        self._model_name = None
Ejemplo n.º 21
0
    def upload_string_as_file_body(body, desired_s3_uri=None, kms_key=None, session=None):
        """Static method that uploads a given file or directory to S3.

        Args:
            body (str): String representing the body of the file.
            desired_s3_uri (str): The desired S3 uri to upload to.
            kms_key (str): The KMS key to use to encrypt the files.
            session (sagemaker.session.Session): AWS session to use. Automatically
                generates one if not provided.

        Returns:
            str: The S3 uri of the uploaded file(s).

        """
        if session is not None:
            _session_v2_rename_warning(session)

        sagemaker_session = session or Session()
        bucket, key = parse_s3_url(desired_s3_uri)

        sagemaker_session.upload_string_as_file_body(
            body=body, bucket=bucket, key=key, kms_key=kms_key
        )

        return desired_s3_uri
Ejemplo n.º 22
0
    def deploy(self,
               initial_instance_count,
               instance_type,
               endpoint_name=None,
               tags=None):
        """Deploy this ``Model`` to an ``Endpoint`` and optionally return a ``Predictor``.

        Create a SageMaker ``Model`` and ``EndpointConfig``, and deploy an ``Endpoint`` from this ``Model``.
        If ``self.predictor_cls`` is not None, this method returns a the result of invoking
        ``self.predictor_cls`` on the created endpoint name.

        The name of the created model is accessible in the ``name`` field of this ``Model`` after deploy returns

        The name of the created endpoint is accessible in the ``endpoint_name``
        field of this ``Model`` after deploy returns.

        Args:
            instance_type (str): The EC2 instance type to deploy this Model to. For example, 'ml.p2.xlarge'.
            initial_instance_count (int): The initial number of instances to run in the
                ``Endpoint`` created from this ``Model``.
            endpoint_name (str): The name of the endpoint to create (default: None).
                If not specified, a unique endpoint name will be created.

        Returns:
            callable[string, sagemaker.session.Session] or None: Invocation of ``self.predictor_cls`` on
                the created endpoint name, if ``self.predictor_cls`` is not None. Otherwise, return None.
        """
        if not self.sagemaker_session:
            if instance_type in ('local', 'local_gpu'):
                self.sagemaker_session = LocalSession()
            else:
                self.sagemaker_session = Session()

        container_def = self.prepare_container_def(instance_type)
        self.name = self.name or name_from_image(container_def['Image'])
        self.sagemaker_session.create_model(self.name,
                                            self.role,
                                            container_def,
                                            vpc_config=self.vpc_config)
        production_variant = sagemaker.production_variant(
            self.name, instance_type, initial_instance_count)
        self.endpoint_name = endpoint_name or self.name
        self.sagemaker_session.endpoint_from_production_variants(
            self.endpoint_name, [production_variant], tags)
        if self.predictor_cls:
            return self.predictor_cls(self.endpoint_name,
                                      self.sagemaker_session)
Ejemplo n.º 23
0
    def attach(cls, auto_ml_job_name, sagemaker_session=None):
        """Attach to an existing AutoML job.

        Creates and returns a AutoML bound to an existing automl job.

        Args:
            auto_ml_job_name (str): AutoML job name
            sagemaker_session (sagemaker.session.Session): A SageMaker Session
                object, used for SageMaker interactions (default: None). If not
                specified, the one originally associated with the ``AutoML`` instance is used.

        Returns:
            sagemaker.automl.AutoML: A ``AutoML`` instance with the attached automl job.

        """
        sagemaker_session = sagemaker_session or Session()

        auto_ml_job_desc = sagemaker_session.describe_auto_ml_job(auto_ml_job_name)
        automl_job_tags = sagemaker_session.sagemaker_client.list_tags(
            ResourceArn=auto_ml_job_desc["AutoMLJobArn"]
        )["Tags"]

        amlj = AutoML(
            role=auto_ml_job_desc["RoleArn"],
            target_attribute_name=auto_ml_job_desc["InputDataConfig"][0]["TargetAttributeName"],
            output_kms_key=auto_ml_job_desc["OutputDataConfig"].get("KmsKeyId"),
            output_path=auto_ml_job_desc["OutputDataConfig"]["S3OutputPath"],
            base_job_name=auto_ml_job_name,
            compression_type=auto_ml_job_desc["InputDataConfig"][0].get("CompressionType"),
            sagemaker_session=sagemaker_session,
            volume_kms_key=auto_ml_job_desc.get("AutoMLJobConfig", {})
            .get("SecurityConfig", {})
            .get("VolumeKmsKeyId"),
            encrypt_inter_container_traffic=auto_ml_job_desc.get("AutoMLJobConfig", {})
            .get("SecurityConfig", {})
            .get("EnableInterContainerTrafficEncryption", False),
            vpc_config=auto_ml_job_desc.get("AutoMLJobConfig", {})
            .get("SecurityConfig", {})
            .get("VpcConfig"),
            problem_type=auto_ml_job_desc.get("ProblemType"),
            max_candidates=auto_ml_job_desc.get("AutoMLJobConfig", {})
            .get("CompletionCriteria", {})
            .get("MaxCandidates"),
            max_runtime_per_training_job_in_seconds=auto_ml_job_desc.get("AutoMLJobConfig", {})
            .get("CompletionCriteria", {})
            .get("MaxRuntimePerTrainingJobInSeconds"),
            total_job_runtime_in_seconds=auto_ml_job_desc.get("AutoMLJobConfig", {})
            .get("CompletionCriteria", {})
            .get("MaxAutoMLJobRuntimeInSeconds"),
            job_objective=auto_ml_job_desc.get("AutoMLJobObjective", {}).get("MetricName"),
            generate_candidate_definitions_only=auto_ml_job_desc.get(
                "GenerateCandidateDefinitionsOnly", False
            ),
            tags=automl_job_tags,
        )
        amlj.current_job_name = auto_ml_job_name
        amlj.latest_auto_ml_job = auto_ml_job_name  # pylint: disable=W0201
        amlj._auto_ml_job_desc = auto_ml_job_desc
        return amlj
Ejemplo n.º 24
0
    def attach(cls,
               tuning_job_name,
               sagemaker_session=None,
               job_details=None,
               estimator_cls=None):
        """Attach to an existing hyperparameter tuning job.

        Create a HyperparameterTuner bound to an existing hyperparameter
        tuning job. After attaching, if there exists a best training job (or any
        other completed training job), that can be deployed to create an Amazon
        SageMaker Endpoint and return a ``Predictor``.

        Examples:
            >>> my_tuner.fit()
            >>> job_name = my_tuner.latest_tuning_job.name
            Later on:
            >>> attached_tuner = HyperparameterTuner.attach(job_name)
            >>> attached_tuner.deploy()

        Args:
            tuning_job_name (str): The name of the hyperparameter tuning job to
                attach to.
            sagemaker_session (sagemaker.session.Session): Session object which
                manages interactions with Amazon SageMaker APIs and any other
                AWS services needed. If not specified, one is created using the
                default AWS configuration chain.
            job_details (dict): The response to a
                ``DescribeHyperParameterTuningJob`` call. If not specified, the
                ``HyperparameterTuner`` will perform one such call with the
                provided hyperparameter tuning job name.
            estimator_cls (str): The estimator class name associated with the
                training jobs, e.g. 'sagemaker.estimator.Estimator'. If not
                specified, the ``HyperparameterTuner`` will try to derive the
                correct estimator class from training job metadata, defaulting
                to :class:~`sagemaker.estimator.Estimator` if it is unable to
                determine a more specific class.

        Returns:
            sagemaker.tuner.HyperparameterTuner: A ``HyperparameterTuner``
            instance with the attached hyperparameter tuning job.
        """
        sagemaker_session = sagemaker_session or Session()

        if job_details is None:
            job_details = sagemaker_session.sagemaker_client.describe_hyper_parameter_tuning_job(
                HyperParameterTuningJobName=tuning_job_name)

        estimator_cls = cls._prepare_estimator_cls(
            estimator_cls, job_details["TrainingJobDefinition"])
        estimator = cls._prepare_estimator_from_job_description(
            estimator_cls, job_details, sagemaker_session)
        init_params = cls._prepare_init_params_from_job_description(
            job_details)

        tuner = cls(estimator=estimator, **init_params)
        tuner.latest_tuning_job = _TuningJob(
            sagemaker_session=sagemaker_session, job_name=tuning_job_name)

        return tuner
Ejemplo n.º 25
0
    def _create_sagemaker_pipeline_model(self, instance_type):
        """Create a SageMaker Model Entity

        Args:
            instance_type (str): The EC2 instance type that this Model will be used for, this is only
                used to determine if the image needs GPU support or not.
            accelerator_type (str): Type of Elastic Inference accelerator to attach to an endpoint for model loading
                and inference, for example, 'ml.eia1.medium'. If not specified, no Elastic Inference accelerator
                will be attached to the endpoint.
        """
        if not self.sagemaker_session:
            self.sagemaker_session = Session()

        containers = self.pipeline_container_def(instance_type)

        self.name = self.name or name_from_image(containers[0]['Image'])
        self.sagemaker_session.create_model(self.name, self.role, containers, vpc_config=self.vpc_config)
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--data_dir', default='data/')
    args = parser.parse_args()

    data_dir = Path(args.data_dir)
    if not data_dir.exists():
        (x_train, y_train), (x_test, y_test) = mnist.load_data()
        data_dir.mkdir()
        np.savez(str(data_dir / 'train'), image=x_train, label=y_train)
        np.savez(str(data_dir / 'test'), image=x_test, label=y_test)

    session = Session()
    s3_bucket_name = os.getenv('S3_BUCKET_NAME', session.default_bucket())
    session.upload_data(path=str(data_dir),
                        bucket=s3_bucket_name,
                        key_prefix='dataset/mnist')
Ejemplo n.º 27
0
def main():
    session = Session()
    s3_bucket_name = os.getenv('S3_BUCKET_NAME', session.default_bucket())

    parser = argparse.ArgumentParser()
    parser.add_argument('--batch_size', type=int, default=32)
    parser.add_argument('--epochs', type=int, default=10)
    parser.add_argument('--role', default=os.environ['SAGEMAKER_ROLE'])
    parser.add_argument('--input_data',
                        default=f's3://{s3_bucket_name}/dataset/mnist')
    parser.add_argument('--output_path',
                        default=f's3://{s3_bucket_name}/training')
    parser.add_argument('--train_instance_type', default='ml.m5.large')
    parser.add_argument('--wait', action='store_true')
    # parser.add_argument('--deploy', action='store_true')
    args = parser.parse_args()

    input_data = {'dataset': args.input_data}
    job_name = 'mnist-' + dt.now().strftime('%Y-%m-%d-%H-%M')

    hyperparameters = {'batch_size': args.batch_size, 'epochs': args.epochs}

    metric_definitions = [
        {
            'Name': 'train loss',
            'Regex': r'loss: (\S+)'
        },
        {
            'Name': 'valid loss',
            'Regex': r'val_loss: (\S+)'
        },
    ]
    estimator = TensorFlow(entry_point='train.py',
                           source_dir='src',
                           role=args.role,
                           train_instance_count=1,
                           train_instance_type=args.train_instance_type,
                           train_volume_size=30,
                           train_max_run=86400,
                           output_path=args.output_path,
                           code_location=args.output_path,
                           py_version='py3',
                           framework_version='1.12.0',
                           hyperparameters=hyperparameters,
                           metric_definitions=metric_definitions)
    estimator.fit(input_data, wait=args.wait, job_name=job_name)
Ejemplo n.º 28
0
    def _create_sagemaker_pipeline_model(self, instance_type):
        """Create a SageMaker Model Entity

        Args:
            instance_type (str): The EC2 instance type that this Model will be
                used for, this is only used to determine if the image needs GPU
                support or not.
        """
        if not self.sagemaker_session:
            self.sagemaker_session = Session()

        containers = self.pipeline_container_def(instance_type)

        self.name = self.name or name_from_image(containers[0]["Image"])
        self.sagemaker_session.create_model(
            self.name, self.role, containers, vpc_config=self.vpc_config
        )
Ejemplo n.º 29
0
 def __init__(self, model_data, role, sagemaker_session=None):
     sagemaker_session = sagemaker_session or Session()
     image = registry(
         sagemaker_session.boto_session.region_name) + "/" + KMeans.repo
     super(KMeansModel, self).__init__(model_data,
                                       image,
                                       role,
                                       predictor_cls=KMeansPredictor,
                                       sagemaker_session=sagemaker_session)
Ejemplo n.º 30
0
def save_to_feature_store():
    logger.info("Save to FeatureStore started")
    global feature_group

    df_data = pd.read_csv(feature_s3_url)
    logger.info("Read data from S3: %s", df_data.head())

    feature_store_session = Session(
        boto_session=boto_session,
        sagemaker_client=sagemaker_client,
        sagemaker_featurestore_runtime_client=featurestore_runtime)
    # You can modify the following to use a bucket of your choosing
    logger.info("Default bucket: %s", default_bucket)

    # record identifier and event time feature names
    record_identifier_feature_name = "IDpol"
    event_time_feature_name = "EventTime"
    current_time_sec = int(round(time.time()))
    # cast object dtype to string. The SageMaker FeatureStore Python SDK will then map the string dtype to String feature type.
    cast_object_to_string(df_data)
    df_data[event_time_feature_name] = pd.Series([current_time_sec] *
                                                 len(df_data),
                                                 dtype="float64")

    feature_group_name = 'insurance-policy-feature-group-' + strftime(
        '%d-%H-%M-%S', gmtime())
    logger.info("Feature Group Name: %s", feature_group_name)

    # Check if feature group already exists. Create a feature group if doesn't exist.
    if feature_group_exist(feature_group_name) == False:
        logger.info("Feature Group: %s doesn't exist. Create a new one.",
                    feature_group)

        feature_group = FeatureGroup(name=feature_group_name,
                                     sagemaker_session=feature_store_session)

        # append EventTime feature
        # load feature definitions to the feature group. SageMaker FeatureStore Python SDK will auto-detect the data schema based on input data.
        feature_group.load_feature_definitions(data_frame=df_data)
        # output is suppressed
        feature_group.create(
            s3_uri=f"s3://{default_bucket}/{prefix}",
            record_identifier_name=record_identifier_feature_name,
            event_time_feature_name=event_time_feature_name,
            role_arn=get_execution_role(),
            enable_online_store=True)

        wait_for_feature_group_creation_complete(feature_group=feature_group)
        feature_group.describe()
    else:
        logger.info("Feature Group: %s exits", feature_group)
        # Init feature group object if already exists
        feature_group = FeatureGroup(name=feature_group_name,
                                     sagemaker_session=feature_store_session)

    # ingest data into feature store
    feature_group.ingest(data_frame=df_data, max_workers=5, wait=True)
Ejemplo n.º 31
0
    def deploy(self, initial_instance_count, instance_type, endpoint_name=None, tags=None):
        """Deploy this ``Model`` to an ``Endpoint`` and optionally return a ``Predictor``.

        Create a SageMaker ``Model`` and ``EndpointConfig``, and deploy an ``Endpoint`` from this ``Model``.
        If ``self.predictor_cls`` is not None, this method returns a the result of invoking
        ``self.predictor_cls`` on the created endpoint name.

        The name of the created endpoint is accessible in the ``endpoint_name``
        field of this ``Model`` after deploy returns.

        Args:
            instance_type (str): The EC2 instance type to deploy this Model to. For example, 'ml.p2.xlarge'.
            initial_instance_count (int): The initial number of instances to run in the
                ``Endpoint`` created from this ``Model``.
            endpoint_name (str): The name of the endpoint to create (default: None).
                If not specified, a unique endpoint name will be created.
            tags (list[dict[str, str]]): A list of key-value pairs for tagging the endpoint (default: None).

        Returns:
            callable[string, sagemaker.session.Session] or None: Invocation of ``self.predictor_cls`` on
                the created endpoint name, if ``self.predictor_cls`` is not None. Otherwise, return None.
        """
        if not self.sagemaker_session:
            if instance_type in ('local', 'local_gpu'):
                self.sagemaker_session = LocalSession()
            else:
                self.sagemaker_session = Session()

        container_def = self.prepare_container_def(instance_type)
        model_name = self.name or name_from_image(container_def['Image'])
        self.sagemaker_session.create_model(model_name, self.role, container_def)
        production_variant = sagemaker.production_variant(model_name, instance_type, initial_instance_count)
        self.endpoint_name = endpoint_name or model_name
        self.sagemaker_session.endpoint_from_production_variants(self.endpoint_name, [production_variant], tags)
        if self.predictor_cls:
            return self.predictor_cls(self.endpoint_name, self.sagemaker_session)
Ejemplo n.º 32
0
class Model(object):
    """A SageMaker ``Model`` that can be deployed to an ``Endpoint``."""

    def __init__(self, model_data, image, role, predictor_cls=None, env=None, name=None, sagemaker_session=None):
        """Initialize an SageMaker ``Model``.

        Args:
            model_data (str): The S3 location of a SageMaker model data ``.tar.gz`` file.
            image (str): A Docker image URI.
            role (str): An AWS IAM role (either name or full ARN). The Amazon SageMaker training jobs and APIs
                that create Amazon SageMaker endpoints use this role to access training data and model artifacts.
                After the endpoint is created, the inference code might use the IAM role,
                if it needs to access an AWS resource.
            predictor_cls (callable[string, sagemaker.session.Session]): A function to call to create
               a predictor (default: None). If not None, ``deploy`` will return the result of invoking
               this function on the created endpoint name.
            env (dict[str, str]): Environment variables to run with ``image`` when hosted in SageMaker (default: None).
            name (str): The model name. If None, a default model name will be selected on each ``deploy``.
            sagemaker_session (sagemaker.session.Session): A SageMaker Session object, used for SageMaker
               interactions (default: None). If not specified, one is created using the default AWS configuration chain.
        """
        self.model_data = model_data
        self.image = image
        self.role = role
        self.predictor_cls = predictor_cls
        self.env = env or {}
        self.name = name
        self.sagemaker_session = sagemaker_session
        self._model_name = None

    def prepare_container_def(self, instance_type):
        """Return a dict created by ``sagemaker.container_def()`` for deploying this model to a specified instance type.

        Subclasses can override this to provide custom container definitions for
        deployment to a specific instance type. Called by ``deploy()``.

        Args:
            instance_type (str): The EC2 instance type to deploy this Model to. For example, 'ml.p2.xlarge'.

        Returns:
            dict: A container definition object usable with the CreateModel API.
        """
        return sagemaker.container_def(self.image, self.model_data, self.env)

    def deploy(self, initial_instance_count, instance_type, endpoint_name=None, tags=None):
        """Deploy this ``Model`` to an ``Endpoint`` and optionally return a ``Predictor``.

        Create a SageMaker ``Model`` and ``EndpointConfig``, and deploy an ``Endpoint`` from this ``Model``.
        If ``self.predictor_cls`` is not None, this method returns a the result of invoking
        ``self.predictor_cls`` on the created endpoint name.

        The name of the created endpoint is accessible in the ``endpoint_name``
        field of this ``Model`` after deploy returns.

        Args:
            instance_type (str): The EC2 instance type to deploy this Model to. For example, 'ml.p2.xlarge'.
            initial_instance_count (int): The initial number of instances to run in the
                ``Endpoint`` created from this ``Model``.
            endpoint_name (str): The name of the endpoint to create (default: None).
                If not specified, a unique endpoint name will be created.
            tags (list[dict[str, str]]): A list of key-value pairs for tagging the endpoint (default: None).

        Returns:
            callable[string, sagemaker.session.Session] or None: Invocation of ``self.predictor_cls`` on
                the created endpoint name, if ``self.predictor_cls`` is not None. Otherwise, return None.
        """
        if not self.sagemaker_session:
            if instance_type in ('local', 'local_gpu'):
                self.sagemaker_session = LocalSession()
            else:
                self.sagemaker_session = Session()

        container_def = self.prepare_container_def(instance_type)
        model_name = self.name or name_from_image(container_def['Image'])
        self.sagemaker_session.create_model(model_name, self.role, container_def)
        production_variant = sagemaker.production_variant(model_name, instance_type, initial_instance_count)
        self.endpoint_name = endpoint_name or model_name
        self.sagemaker_session.endpoint_from_production_variants(self.endpoint_name, [production_variant], tags)
        if self.predictor_cls:
            return self.predictor_cls(self.endpoint_name, self.sagemaker_session)