Example #1
0
def test_upload(sagemaker_session):
    desired_s3_uri = os.path.join("s3://", BUCKET_NAME, CURRENT_JOB_NAME,
                                  SOURCE_NAME)
    S3Uploader.upload(local_path="/path/to/app.jar",
                      desired_s3_uri=desired_s3_uri,
                      session=sagemaker_session)
    sagemaker_session.upload_data.assert_called_with(
        path="/path/to/app.jar",
        bucket=BUCKET_NAME,
        key_prefix=os.path.join(CURRENT_JOB_NAME, SOURCE_NAME),
        extra_args=None,
    )
Example #2
0
def upload_to_s3(local_path, s3_data_location, *, search=None):
    import os
    from tqdm import tqdm
    from sagemaker.s3 import S3Uploader as s3up

    for root, dirs, files in os.walk(local_path):
        if len(files) > 0:
            idx = len(local_path)
            for name in tqdm(files, desc=f"Uploading folder '{root}'"):
                file_path = os.path.join(root, name)
                s3_path = os.path.join(s3_data_location, root[idx:])
                s3up.upload(file_path, s3_path)
Example #3
0
def test_upload(sagemaker_session, caplog):
    desired_s3_uri = os.path.join("s3://", BUCKET_NAME, CURRENT_JOB_NAME,
                                  SOURCE_NAME)
    S3Uploader.upload(local_path="/path/to/app.jar",
                      desired_s3_uri=desired_s3_uri,
                      session=sagemaker_session)
    sagemaker_session.upload_data.assert_called_with(
        path="/path/to/app.jar",
        bucket=BUCKET_NAME,
        key_prefix=os.path.join(CURRENT_JOB_NAME, SOURCE_NAME),
        extra_args=None,
    )
    warning_message = (
        "Parameter 'session' will be renamed to 'sagemaker_session' "
        "in SageMaker Python SDK v2.")
    assert warning_message in caplog.text
Example #4
0
def sagemaker_processing_handle(args, config, main):
    if args.sagemaker_run:
        # Remote processing
        sagemaker_processing_run(args=args, config=config)
    else:
        # Local processing
        args, tmps, uris = sagemaker_processing_local_args(args=args,
                                                           config=config)
        try:
            main(args)
            if tmps:
                session = sagemaker_session(
                    profile_name=args.sagemaker_profile)
                for k in tmps.keys():
                    S3Uploader.upload(local_path=getattr(args, k),
                                      desired_s3_uri=uris[k],
                                      sagemaker_session=session)
        finally:
            for tmp in tmps.values():
                tmp.__exit__()
Example #5
0
def upload(src, dst, gz, session: sagemaker.Session, root='.'):
    dst = cli_argument(dst, session=session)
    if not os.path.exists(src):
        raise click.UsageError("Source must exist")
    if not dst.startswith('s3://'):
        if dst.startswith('/'):
            dst = dst[1:]
        bucket = session.default_bucket()
        dst = 's3://{}/{}'.format(bucket, dst)
    url = urlparse(dst)
    assert url.scheme == 's3'
    bucket = url.netloc
    key = url.path
    if key.startswith('/'):
        key = key[1:]
    if os.path.isfile(src):
        if gz:
            raise click.UsageError(
                "Option gz is only valid for source directories")
        s3 = session.boto_session.client('s3')
        s3.upload_file(src, bucket, key)
    elif os.path.isdir(src):
        if gz:
            if not re.match(".*\\.(tar\\.gz||tgz)$", dst, re.IGNORECASE):
                raise click.UsageError(
                    "Destination should end in .tar.gz or tgz")
            s3_dst = os.path.dirname(dst)
            file_name = os.path.basename(dst)
            with _tmpdir() as tmp:
                p = os.path.join(tmp, file_name)
                with tarfile.open(p, 'w:gz') as arc:
                    arc.add(name=src, arcname=root, recursive=True)
                s3 = session.boto_session.client('s3')
                s3.upload_file(p, bucket, key)
        else:
            S3Uploader.upload(local_path=src,
                              desired_s3_uri=dst,
                              sagemaker_session=session)
    else:
        raise click.UsageError("Source must be file or directory")
def upload_local_channel(channel, session, s3_uri):
    url = urlparse(channel)
    if url.scheme == 's3':
        return channel
    elif url.scheme == 'file':
        path = url2pathname(url.path)
        S3Uploader.upload(
            local_path=path,
            desired_s3_uri=s3_uri,
            sagemaker_session=session
        )
        if os.path.isfile(path):
            #todo: urljoin
            s3_uri = "{}/{}".format(s3_uri, os.path.basename(path))
        print("Uploaded [{}] ([{}]) to [{}]".format(
            channel, path, s3_uri
        ))
        return s3_uri
    else:
        print("Type {}".format(type(s3_uri)))
        raise ValueError(
            "Unknown scheme: [{}] (uri: {})".format(url.scheme, channel))
def dataset(sagemaker_session):
    dataset_local_path = os.path.join(
        DATA_DIR, "pipeline/clarify_check_step/dataset.csv")
    dataset_s3_uri = "s3://{}/{}/{}/{}/{}".format(
        sagemaker_session.default_bucket(),
        "clarify_check_step",
        "input",
        "dataset",
        utils.unique_name_from_base("dataset"),
    )
    return S3Uploader.upload(dataset_local_path,
                             dataset_s3_uri,
                             sagemaker_session=sagemaker_session)
    def _inject_repack_script(self):
        """Injects the _repack_model.py script where it belongs.

        If the source_dir is an S3 path:
            1) downloads the source_dir tar.gz
            2) copies the _repack_model.py script where it belongs
            3) uploads the mutated source_dir

        If the source_dir is a local path:
            1) copies the _repack_model.py script into the source dir
        """
        fname = os.path.join(os.path.dirname(__file__), REPACK_SCRIPT)
        if self._source_dir.lower().startswith("s3://"):
            with tempfile.TemporaryDirectory() as tmp:
                local_path = os.path.join(tmp, "local.tar.gz")

                S3Downloader.download(
                    s3_uri=self._source_dir,
                    local_path=local_path,
                    sagemaker_session=self._estimator.sagemaker_session,
                )

                src_dir = os.path.join(tmp, "src")
                with tarfile.open(name=local_path, mode="r:gz") as tf:
                    tf.extractall(path=src_dir)

                shutil.copy2(fname, os.path.join(src_dir, REPACK_SCRIPT))
                with tarfile.open(name=local_path, mode="w:gz") as tf:
                    tf.add(src_dir, arcname=".")

                S3Uploader.upload(
                    local_path=local_path,
                    desired_s3_uri=self._source_dir,
                    sagemaker_session=self._estimator.sagemaker_session,
                )
        else:
            shutil.copy2(fname, os.path.join(self._source_dir, REPACK_SCRIPT))
Example #9
0
    def _normalize_inputs(self, inputs=None):
        """Ensures that all the ``ProcessingInput`` objects have names and S3 URIs.

        Args:
            inputs (list[sagemaker.processing.ProcessingInput]): A list of ``ProcessingInput``
                objects to be normalized (default: None). If not specified,
                an empty list is returned.

        Returns:
            list[sagemaker.processing.ProcessingInput]: The list of normalized
                ``ProcessingInput`` objects.

        Raises:
            TypeError: if the inputs are not ``ProcessingInput`` objects.
        """
        # Initialize a list of normalized ProcessingInput objects.
        normalized_inputs = []
        if inputs is not None:
            # Iterate through the provided list of inputs.
            for count, file_input in enumerate(inputs, 1):
                if not isinstance(file_input, ProcessingInput):
                    raise TypeError(
                        "Your inputs must be provided as ProcessingInput objects."
                    )
                # Generate a name for the ProcessingInput if it doesn't have one.
                if file_input.input_name is None:
                    file_input.input_name = "input-{}".format(count)
                # If the source is a local path, upload it to S3
                # and save the S3 uri in the ProcessingInput source.
                parse_result = urlparse(file_input.source)
                if parse_result.scheme != "s3":
                    desired_s3_uri = os.path.join(
                        "s3://",
                        self.sagemaker_session.default_bucket(),
                        self._current_job_name,
                        "input",
                        file_input.input_name,
                    )
                    s3_uri = S3Uploader.upload(
                        local_path=file_input.source,
                        desired_s3_uri=desired_s3_uri,
                        session=self.sagemaker_session,
                    )
                    file_input.source = s3_uri
                normalized_inputs.append(file_input)
        return normalized_inputs
Example #10
0
    def _upload_code(self, code):
        """Uploads a code file or directory specified as a string
        and returns the S3 URI.

        Args:
            code (str): A file or directory to be uploaded to S3.

        Returns:
            str: The S3 URI of the uploaded file or directory.

        """
        desired_s3_uri = "s3://{}/{}/input/{}".format(
            self.sagemaker_session.default_bucket(),
            self._current_job_name,
            self._CODE_CONTAINER_INPUT_NAME,
        )
        return S3Uploader.upload(local_path=code,
                                 desired_s3_uri=desired_s3_uri,
                                 session=self.sagemaker_session)
def multi_variant_endpoint(sagemaker_session):
    """
    Sets up the multi variant endpoint before the integration tests run.
    Cleans up the multi variant endpoint after the integration tests run.
    """
    multi_variant_endpoint.endpoint_name = unique_name_from_base(
        "integ-test-multi-variant-endpoint")
    with tests.integ.timeout.timeout_and_delete_endpoint_by_name(
            endpoint_name=multi_variant_endpoint.endpoint_name,
            sagemaker_session=sagemaker_session,
            hours=2,
    ):

        # Creating a model
        bucket = sagemaker_session.default_bucket()
        prefix = "sagemaker/DEMO-VariantTargeting"
        model_url = S3Uploader.upload(
            local_path=XG_BOOST_MODEL_LOCAL_PATH,
            desired_s3_uri="s3://" + bucket + "/" + prefix,
            session=sagemaker_session,
        )

        image_uri = get_image_uri(sagemaker_session.boto_session.region_name,
                                  "xgboost", "0.90-1")

        multi_variant_endpoint_model = sagemaker_session.create_model(
            name=MODEL_NAME,
            role=ROLE,
            container_defs={
                "Image": image_uri,
                "ModelDataUrl": model_url
            },
        )

        # Creating a multi variant endpoint
        variant1 = production_variant(
            model_name=MODEL_NAME,
            instance_type=DEFAULT_INSTANCE_TYPE,
            initial_instance_count=DEFAULT_INSTANCE_COUNT,
            variant_name=TEST_VARIANT_1,
            initial_weight=TEST_VARIANT_1_WEIGHT,
        )
        variant2 = production_variant(
            model_name=MODEL_NAME,
            instance_type=DEFAULT_INSTANCE_TYPE,
            initial_instance_count=DEFAULT_INSTANCE_COUNT,
            variant_name=TEST_VARIANT_2,
            initial_weight=TEST_VARIANT_2_WEIGHT,
        )
        sagemaker_session.endpoint_from_production_variants(
            name=multi_variant_endpoint.endpoint_name,
            production_variants=[variant1, variant2])

        # Yield to run the integration tests
        yield multi_variant_endpoint

        # Cleanup resources
        sagemaker_session.delete_model(multi_variant_endpoint_model)
        sagemaker_session.sagemaker_client.delete_endpoint_config(
            EndpointConfigName=multi_variant_endpoint.endpoint_name)

    # Validate resource cleanup
    with pytest.raises(Exception) as exception:
        sagemaker_session.sagemaker_client.describe_model(
            ModelName=multi_variant_endpoint_model.name)
        assert "Could not find model" in str(exception.value)
        sagemaker_session.sagemaker_client.describe_endpoint_config(
            name=multi_variant_endpoint.endpoint_name)
        assert "Could not find endpoint" in str(exception.value)
Example #12
0
from sagemaker.s3 import S3Uploader
from util import data_io
from util.util_methods import exec_command
import os

if __name__ == '__main__':
    local_path = f"{os.environ['HOME']}/data"
    data_io.download_data("https://s3.amazonaws.com/research.metamind.io/wikitext","wikitext-2-raw-v1.zip",local_path,unzip_it=True,remove_zipped=True)
    folder_name = "wikitext-2-raw-v1"
    file_to_upload = f"/tmp/{folder_name}.tar.gz"
    exec_command(f"cd {local_path} && tar -czvf {file_to_upload} {folder_name}")
    s3_prefix = "s3://tilos-ml-bucket/wikitext-2-raw-v1"
    S3Uploader.upload(file_to_upload, f"{s3_prefix}")
Example #13
0
def sagemaker_training_run(args,
                           config: SageMakerTrainingConfig,
                           metrics=None):
    if os.getenv('SM_TRAINING_ENV', None):
        warnings.warn(
            "Trying to start a SageMaker container from a SageMaker container. Possible loop detected."
        )

    if metrics is None:
        metrics = {}
    session = sagemaker_session(profile_name=args.sagemaker_profile)
    image_uri = ecr_ensure_image(image=Image(
        path=args.sagemaker_training_image,
        tag=args.sagemaker_training_image,
        accounts=args.sagemaker_training_image.split(",")),
                                 session=session.boto_session)
    script = args.sagemaker_script
    script = os.path.abspath(script)
    source = args.sagemaker_source
    if not source:
        source = os.path.dirname(script)
    if not script.startswith(source):
        raise ValueError("script=[{}] must be in source=[{}]")
    entry_point = script[len(source) + 1:]
    entry_point = entry_point.replace("\\", "/")
    print(f"Source: {source}, entry_point: {entry_point}")
    metric_definitions = [{'Name': k, 'Regex': v} for k, v in metrics.items()]
    dependencies = [getattr(args, k) for k in config.dependencies.keys()]
    print("Dependencies: {}".format(dependencies))

    # checkpoint_local_path='/opt/ml/checkpoints/'
    bucket = session.default_bucket()
    if args.sagemaker_job_name and args.sagemaker_job_name.strip():
        job_name = args.sagemaker_job_name
    else:
        job_name = name_from_base(args.sagemaker_base_job_name)
    tags = git_get_tags(script)
    tags["Source"] = 'aws-sagemaker-remote'
    tags["JobName"] = job_name
    tags["BaseJobName"] = args.sagemaker_base_job_name
    tags = make_tags(tags)
    #checkpoint_s3_uri = 's3://{}/{}/checkpoints'.format(bucket, job_name)
    input_prefix = "s3://{}/{}/inputs".format(bucket, job_name)
    iam = session.boto_session.client('iam')
    training_role = ensure_training_role(
        iam=iam, role_name=args.sagemaker_training_role)
    hyperparameters = {
        k.replace('_', '-'): str(v)
        for k, v in vars(args).items() if v is not None and len(str(v)) > 0
    }
    hyperparameters['sagemaker-run'] = 'False'
    if args.sagemaker_checkpoint_s3 and args.sagemaker_checkpoint_s3 != 'default':
        if not args.sagemaker_checkpoint_s3.startswith('s3://'):
            raise ValueError(
                "--sagemaker-checkpoint-s3 must be an S3 URI (s3://...) or \"default\""
            )
        checkpoint_s3 = args.sagemaker_checkpoint_s3
    else:
        checkpoint_s3 = "s3://{}/{}/checkpoints".format(bucket, job_name)
    hyperparameters['checkpoint-dir'] = args.sagemaker_checkpoint_container
    # Initial checkpoint
    if args.checkpoint_initial:
        if args.checkpoint_initial.startswith("s3://"):
            copy_s3(args.checkpoint_initial, checkpoint_s3,
                    session.boto_session.client('s3'))
        else:
            S3Uploader.upload(local_path=args.checkpoint_initial,
                              desired_s3_uri=checkpoint_s3,
                              sagemaker_session=session)

    if 'sagemaker-job-name' in hyperparameters:
        del hyperparameters['sagemaker-job-name']

    s3 = session.boto_session.client('s3')
    channels = config.inputs
    channels = process_channels(channels,
                                args=args,
                                session=session,
                                prefix=input_prefix)
    training_inputs = build_training_inputs(channels=channels, args=args)
    set_suffixes(channels=channels,
                 session=session,
                 hyperparameters=hyperparameters)
    print("Hyperparameters: {}".format(hyperparameters))

    if not training_inputs:
        training_inputs = None
    else:
        print("training_inputs: {}".format(list(training_inputs.keys())))
    #import pprint
    #pprint.pprint({k: v.config for k, v in channels.items()})
    #env = config.env

    estimator = PyTorch(
        sagemaker_session=session,
        base_job_name=args.sagemaker_base_job_name,
        entry_point=entry_point,
        source_dir=source,
        role=training_role,
        instance_type=args.sagemaker_training_instance,
        image_uri=image_uri,
        instance_count=1,
        framework_version='1.5.0',
        # hyperparameters=hyperparameters_from_argparse(vars(args)),
        metric_definitions=metric_definitions,
        dependencies=dependencies,
        checkpoint_s3_uri=checkpoint_s3,
        checkpoint_local_path=args.sagemaker_checkpoint_container,
        use_spot_instances=args.sagemaker_spot_instances,
        hyperparameters=hyperparameters,
        volume_size=args.sagemaker_volume_size,
        tags=tags,
        max_wait=args.sagemaker_max_wait
        if args.sagemaker_spot_instances else None,
        max_run=args.sagemaker_max_run)

    if args.sagemaker_experiment_name:
        sagemaker_client = session.boto_session.client('sagemaker')
        ensure_experiment(client=sagemaker_client,
                          experiment_name=args.sagemaker_experiment_name)
        experiment_config = {"ExperimentName": args.sagemaker_experiment_name}
        if args.sagemaker_trial_name:
            experiment_config["TrialName"] = args.sagemaker_trial_name
    else:
        if args.sagemaker_trial_name:
            raise ValueError(
                "If `sagemaker_trial_name` is provided, `sagemaker_experiment_name` must be provided as well"
            )
        experiment_config = None

    estimator.fit(training_inputs,
                  job_name=job_name,
                  wait=False,
                  experiment_config=experiment_config)
    job = estimator.latest_training_job
    if args.sagemaker_output_json:
        obj = job.describe()
        #print("Describe: {}".format(obj))
        os.makedirs(os.path.dirname(os.path.abspath(
            args.sagemaker_output_json)),
                    exist_ok=True)
        with open(args.sagemaker_output_json, 'w') as f:
            json.dump(obj, f, default=json_converter, indent=4)

    if args.sagemaker_wait:
        job.wait(logs=True)  # args.sagemaker_logs)
    # todo:
    # use_spot_instances
    # experiment_config (dict[str, str]): Experiment management configuration.
    #            Dictionary contains three optional keys,
    #            'ExperimentName', 'TrialName', and 'TrialComponentDisplayName'.
    return estimator
Example #14
0
    def _stage_submit_deps(self, submit_deps, input_channel_name):
        """Prepares a list of paths to jars, py-files, or files dependencies.

        This prepared list of paths is provided as `spark-submit` options.
        The submit_deps list may include a combination of S3 URIs and local paths.
        Any S3 URIs are appended to the `spark-submit` option value without modification.
        Any local file paths are copied to a temp directory, uploaded to a default S3 URI,
        and included as a ProcessingInput channel to provide as local files to the SageMaker
        Spark container.

        :param submit_deps (list[str]): List of one or more dependency paths to include.
        :param input_channel_name (str): The `spark-submit` option name associated with
                    the input channel.
        :return (Optional[ProcessingInput], str): Tuple of (left) optional ProcessingInput
                    for the input channel, and (right) comma-delimited value for
                    `spark-submit` option.
        """
        if not submit_deps:
            raise ValueError(
                f"submit_deps value may not be empty. {self._submit_deps_error_message}"
            )
        if not input_channel_name:
            raise ValueError("input_channel_name value may not be empty.")

        input_channel_s3_uri = (
            f"s3://{self.sagemaker_session.default_bucket()}"
            f"/{self._current_job_name}/input/{input_channel_name}")

        use_input_channel = False
        spark_opt_s3_uris = []

        with tempfile.TemporaryDirectory() as tmpdir:
            for dep_path in submit_deps:
                dep_url = urlparse(dep_path)
                # S3 URIs are included as-is in the spark-submit argument
                if dep_url.scheme in ["s3", "s3a"]:
                    spark_opt_s3_uris.append(dep_path)
                # Local files are copied to temp directory to be uploaded to S3
                elif not dep_url.scheme or dep_url.scheme == "file":
                    if not os.path.isfile(dep_path):
                        raise ValueError(
                            f"submit_deps path {dep_path} is not a valid local file. "
                            f"{self._submit_deps_error_message}")
                    logger.info(
                        "Copying dependency from local path %s to tmpdir %s",
                        dep_path, tmpdir)
                    shutil.copy(dep_path, tmpdir)
                else:
                    raise ValueError(
                        f"submit_deps path {dep_path} references unsupported filesystem "
                        f"scheme: {dep_url.scheme} {self._submit_deps_error_message}"
                    )

            # If any local files were found and copied, upload the temp directory to S3
            if os.listdir(tmpdir):
                logger.info("Uploading dependencies from tmpdir %s to S3 %s",
                            tmpdir, input_channel_s3_uri)
                S3Uploader.upload(
                    local_path=tmpdir,
                    desired_s3_uri=input_channel_s3_uri,
                    sagemaker_session=self.sagemaker_session,
                )
                use_input_channel = True

        # If any local files were uploaded, construct a ProcessingInput to provide
        # them to the Spark container  and form the spark-submit option from a
        # combination of S3 URIs and container's local input path
        if use_input_channel:
            input_channel = ProcessingInput(
                source=input_channel_s3_uri,
                destination=
                f"{self._conf_container_base_path}{input_channel_name}",
                input_name=input_channel_name,
            )
            spark_opt = ",".join(spark_opt_s3_uris +
                                 [input_channel.destination])
        # If no local files were uploaded, form the spark-submit option from a list of S3 URIs
        else:
            input_channel = None
            spark_opt = ",".join(spark_opt_s3_uris)

        return input_channel, spark_opt
Example #15
0
 def _write_to_remote_storage(cls, local, remote):
     # Currently, supports Amazon S3 exclusively
     S3Uploader.upload(local, remote)
def test_model_registration_with_drift_check_baselines(
    sagemaker_session,
    role,
    pipeline_name,
):
    instance_count = ParameterInteger(name="InstanceCount", default_value=1)
    instance_type = ParameterString(name="InstanceType",
                                    default_value="ml.m5.xlarge")

    # upload model data to s3
    model_local_path = os.path.join(DATA_DIR, "mxnet_mnist/model.tar.gz")
    model_base_uri = "s3://{}/{}/input/model/{}".format(
        sagemaker_session.default_bucket(),
        "register_model_test_with_drift_baseline",
        utils.unique_name_from_base("model"),
    )
    model_uri = S3Uploader.upload(model_local_path,
                                  model_base_uri,
                                  sagemaker_session=sagemaker_session)
    model_uri_param = ParameterString(name="model_uri",
                                      default_value=model_uri)

    # upload metrics to s3
    metrics_data = (
        '{"regression_metrics": {"mse": {"value": 4.925353410353891, '
        '"standard_deviation": 2.219186917819692}}}')
    metrics_base_uri = "s3://{}/{}/input/metrics/{}".format(
        sagemaker_session.default_bucket(),
        "register_model_test_with_drift_baseline",
        utils.unique_name_from_base("metrics"),
    )
    metrics_uri = S3Uploader.upload_string_as_file_body(
        body=metrics_data,
        desired_s3_uri=metrics_base_uri,
        sagemaker_session=sagemaker_session,
    )
    metrics_uri_param = ParameterString(name="metrics_uri",
                                        default_value=metrics_uri)

    model_metrics = ModelMetrics(
        bias=MetricsSource(
            s3_uri=metrics_uri_param,
            content_type="application/json",
        ),
        explainability=MetricsSource(
            s3_uri=metrics_uri_param,
            content_type="application/json",
        ),
        bias_pre_training=MetricsSource(
            s3_uri=metrics_uri_param,
            content_type="application/json",
        ),
        bias_post_training=MetricsSource(
            s3_uri=metrics_uri_param,
            content_type="application/json",
        ),
    )
    drift_check_baselines = DriftCheckBaselines(
        model_statistics=MetricsSource(
            s3_uri=metrics_uri_param,
            content_type="application/json",
        ),
        model_constraints=MetricsSource(
            s3_uri=metrics_uri_param,
            content_type="application/json",
        ),
        model_data_statistics=MetricsSource(
            s3_uri=metrics_uri_param,
            content_type="application/json",
        ),
        model_data_constraints=MetricsSource(
            s3_uri=metrics_uri_param,
            content_type="application/json",
        ),
        bias_config_file=FileSource(
            s3_uri=metrics_uri_param,
            content_type="application/json",
        ),
        bias_pre_training_constraints=MetricsSource(
            s3_uri=metrics_uri_param,
            content_type="application/json",
        ),
        bias_post_training_constraints=MetricsSource(
            s3_uri=metrics_uri_param,
            content_type="application/json",
        ),
        explainability_constraints=MetricsSource(
            s3_uri=metrics_uri_param,
            content_type="application/json",
        ),
        explainability_config_file=FileSource(
            s3_uri=metrics_uri_param,
            content_type="application/json",
        ),
    )
    customer_metadata_properties = {"key1": "value1"}
    estimator = XGBoost(
        entry_point="training.py",
        source_dir=os.path.join(DATA_DIR, "sip"),
        instance_type=instance_type,
        instance_count=instance_count,
        framework_version="0.90-2",
        sagemaker_session=sagemaker_session,
        py_version="py3",
        role=role,
    )
    step_register = RegisterModel(
        name="MyRegisterModelStep",
        estimator=estimator,
        model_data=model_uri_param,
        content_types=["application/json"],
        response_types=["application/json"],
        inference_instances=["ml.t2.medium", "ml.m5.xlarge"],
        transform_instances=["ml.m5.xlarge"],
        model_package_group_name="testModelPackageGroup",
        model_metrics=model_metrics,
        drift_check_baselines=drift_check_baselines,
        customer_metadata_properties=customer_metadata_properties,
    )

    pipeline = Pipeline(
        name=pipeline_name,
        parameters=[
            model_uri_param,
            metrics_uri_param,
            instance_type,
            instance_count,
        ],
        steps=[step_register],
        sagemaker_session=sagemaker_session,
    )

    try:
        response = pipeline.create(role)
        create_arn = response["PipelineArn"]

        for _ in retries(
                max_retry_count=5,
                exception_message_prefix=
                "Waiting for a successful execution of pipeline",
                seconds_to_sleep=10,
        ):
            execution = pipeline.start(parameters={
                "model_uri": model_uri,
                "metrics_uri": metrics_uri
            })
            response = execution.describe()

            assert response["PipelineArn"] == create_arn

            try:
                execution.wait(delay=30, max_attempts=60)
            except WaiterError:
                pass
            execution_steps = execution.list_steps()

            assert len(execution_steps) == 1
            failure_reason = execution_steps[0].get("FailureReason", "")
            if failure_reason != "":
                logging.error(
                    f"Pipeline execution failed with error: {failure_reason}."
                    " Retrying..")
                continue
            assert execution_steps[0]["StepStatus"] == "Succeeded"
            assert execution_steps[0]["StepName"] == "MyRegisterModelStep"

            response = sagemaker_session.sagemaker_client.describe_model_package(
                ModelPackageName=execution_steps[0]["Metadata"]
                ["RegisterModel"]["Arn"])

            assert (response["ModelMetrics"]["Explainability"]["Report"]
                    ["ContentType"] == "application/json")
            assert (response["DriftCheckBaselines"]["Bias"][
                "PreTrainingConstraints"]["ContentType"] == "application/json")
            assert (response["DriftCheckBaselines"]["Explainability"]
                    ["Constraints"]["ContentType"] == "application/json")
            assert (response["DriftCheckBaselines"]["ModelQuality"]
                    ["Statistics"]["ContentType"] == "application/json")
            assert (response["DriftCheckBaselines"]["ModelDataQuality"]
                    ["Statistics"]["ContentType"] == "application/json")
            assert response[
                "CustomerMetadataProperties"] == customer_metadata_properties
            break
    finally:
        try:
            pipeline.delete()
        except Exception:
            pass
 def _write_to_remote_storage(cls, local, remote):
     # Currently, only supports Amazon S3
     S3Uploader.upload(local, remote)
Example #18
0
# The name used for the project which is used for things like S3 bucket location prefix
project_name = 'customer-churn-wx-' + dateAsString
# The name used when the Model is created
model_name='customer-churn-model-wx-' + dateAsString
training_job_name = "CustomerChurnTrainingJob-wx-" + dateAsString

# specify the roles that will be used by the various artifacts
workflow_execution_role = os.getenv('workflow_execution_role')
sagemaker_execution_role = os.getenv('sagemaker_execution_role')
glue_role = os.getenv('glue_role')
lambda_role = os.getenv('lambda_role')
registry_lambda_role= os.getenv('model_registry_lambda_role')


data_source = S3Uploader.upload(local_path='./data/customer-churn.csv',
                               desired_s3_uri='s3://{}/{}'.format(bucket, project_name),
                               session=session)

train_prefix = 'train'
val_prefix = 'validation'

train_data = 's3://{}/{}/{}/'.format(bucket, project_name, train_prefix)
validation_data = 's3://{}/{}/{}/'.format(bucket, project_name, val_prefix)


glue_script_location = S3Uploader.upload(local_path='./code/glue_etl.py',
                               desired_s3_uri='s3://{}/{}'.format(bucket, project_name),
                               session=session)

glue_client = boto3.client('glue')
def save_model(net, output_file):
    """This method saves the model to file"""
    file_name = "net.params"  # local version
    net.save_parameters(file_name)
    S3Uploader.upload(file_name, output_file)