def test_upload(sagemaker_session): desired_s3_uri = os.path.join("s3://", BUCKET_NAME, CURRENT_JOB_NAME, SOURCE_NAME) S3Uploader.upload(local_path="/path/to/app.jar", desired_s3_uri=desired_s3_uri, session=sagemaker_session) sagemaker_session.upload_data.assert_called_with( path="/path/to/app.jar", bucket=BUCKET_NAME, key_prefix=os.path.join(CURRENT_JOB_NAME, SOURCE_NAME), extra_args=None, )
def upload_to_s3(local_path, s3_data_location, *, search=None): import os from tqdm import tqdm from sagemaker.s3 import S3Uploader as s3up for root, dirs, files in os.walk(local_path): if len(files) > 0: idx = len(local_path) for name in tqdm(files, desc=f"Uploading folder '{root}'"): file_path = os.path.join(root, name) s3_path = os.path.join(s3_data_location, root[idx:]) s3up.upload(file_path, s3_path)
def test_upload(sagemaker_session, caplog): desired_s3_uri = os.path.join("s3://", BUCKET_NAME, CURRENT_JOB_NAME, SOURCE_NAME) S3Uploader.upload(local_path="/path/to/app.jar", desired_s3_uri=desired_s3_uri, session=sagemaker_session) sagemaker_session.upload_data.assert_called_with( path="/path/to/app.jar", bucket=BUCKET_NAME, key_prefix=os.path.join(CURRENT_JOB_NAME, SOURCE_NAME), extra_args=None, ) warning_message = ( "Parameter 'session' will be renamed to 'sagemaker_session' " "in SageMaker Python SDK v2.") assert warning_message in caplog.text
def sagemaker_processing_handle(args, config, main): if args.sagemaker_run: # Remote processing sagemaker_processing_run(args=args, config=config) else: # Local processing args, tmps, uris = sagemaker_processing_local_args(args=args, config=config) try: main(args) if tmps: session = sagemaker_session( profile_name=args.sagemaker_profile) for k in tmps.keys(): S3Uploader.upload(local_path=getattr(args, k), desired_s3_uri=uris[k], sagemaker_session=session) finally: for tmp in tmps.values(): tmp.__exit__()
def upload(src, dst, gz, session: sagemaker.Session, root='.'): dst = cli_argument(dst, session=session) if not os.path.exists(src): raise click.UsageError("Source must exist") if not dst.startswith('s3://'): if dst.startswith('/'): dst = dst[1:] bucket = session.default_bucket() dst = 's3://{}/{}'.format(bucket, dst) url = urlparse(dst) assert url.scheme == 's3' bucket = url.netloc key = url.path if key.startswith('/'): key = key[1:] if os.path.isfile(src): if gz: raise click.UsageError( "Option gz is only valid for source directories") s3 = session.boto_session.client('s3') s3.upload_file(src, bucket, key) elif os.path.isdir(src): if gz: if not re.match(".*\\.(tar\\.gz||tgz)$", dst, re.IGNORECASE): raise click.UsageError( "Destination should end in .tar.gz or tgz") s3_dst = os.path.dirname(dst) file_name = os.path.basename(dst) with _tmpdir() as tmp: p = os.path.join(tmp, file_name) with tarfile.open(p, 'w:gz') as arc: arc.add(name=src, arcname=root, recursive=True) s3 = session.boto_session.client('s3') s3.upload_file(p, bucket, key) else: S3Uploader.upload(local_path=src, desired_s3_uri=dst, sagemaker_session=session) else: raise click.UsageError("Source must be file or directory")
def upload_local_channel(channel, session, s3_uri): url = urlparse(channel) if url.scheme == 's3': return channel elif url.scheme == 'file': path = url2pathname(url.path) S3Uploader.upload( local_path=path, desired_s3_uri=s3_uri, sagemaker_session=session ) if os.path.isfile(path): #todo: urljoin s3_uri = "{}/{}".format(s3_uri, os.path.basename(path)) print("Uploaded [{}] ([{}]) to [{}]".format( channel, path, s3_uri )) return s3_uri else: print("Type {}".format(type(s3_uri))) raise ValueError( "Unknown scheme: [{}] (uri: {})".format(url.scheme, channel))
def dataset(sagemaker_session): dataset_local_path = os.path.join( DATA_DIR, "pipeline/clarify_check_step/dataset.csv") dataset_s3_uri = "s3://{}/{}/{}/{}/{}".format( sagemaker_session.default_bucket(), "clarify_check_step", "input", "dataset", utils.unique_name_from_base("dataset"), ) return S3Uploader.upload(dataset_local_path, dataset_s3_uri, sagemaker_session=sagemaker_session)
def _inject_repack_script(self): """Injects the _repack_model.py script where it belongs. If the source_dir is an S3 path: 1) downloads the source_dir tar.gz 2) copies the _repack_model.py script where it belongs 3) uploads the mutated source_dir If the source_dir is a local path: 1) copies the _repack_model.py script into the source dir """ fname = os.path.join(os.path.dirname(__file__), REPACK_SCRIPT) if self._source_dir.lower().startswith("s3://"): with tempfile.TemporaryDirectory() as tmp: local_path = os.path.join(tmp, "local.tar.gz") S3Downloader.download( s3_uri=self._source_dir, local_path=local_path, sagemaker_session=self._estimator.sagemaker_session, ) src_dir = os.path.join(tmp, "src") with tarfile.open(name=local_path, mode="r:gz") as tf: tf.extractall(path=src_dir) shutil.copy2(fname, os.path.join(src_dir, REPACK_SCRIPT)) with tarfile.open(name=local_path, mode="w:gz") as tf: tf.add(src_dir, arcname=".") S3Uploader.upload( local_path=local_path, desired_s3_uri=self._source_dir, sagemaker_session=self._estimator.sagemaker_session, ) else: shutil.copy2(fname, os.path.join(self._source_dir, REPACK_SCRIPT))
def _normalize_inputs(self, inputs=None): """Ensures that all the ``ProcessingInput`` objects have names and S3 URIs. Args: inputs (list[sagemaker.processing.ProcessingInput]): A list of ``ProcessingInput`` objects to be normalized (default: None). If not specified, an empty list is returned. Returns: list[sagemaker.processing.ProcessingInput]: The list of normalized ``ProcessingInput`` objects. Raises: TypeError: if the inputs are not ``ProcessingInput`` objects. """ # Initialize a list of normalized ProcessingInput objects. normalized_inputs = [] if inputs is not None: # Iterate through the provided list of inputs. for count, file_input in enumerate(inputs, 1): if not isinstance(file_input, ProcessingInput): raise TypeError( "Your inputs must be provided as ProcessingInput objects." ) # Generate a name for the ProcessingInput if it doesn't have one. if file_input.input_name is None: file_input.input_name = "input-{}".format(count) # If the source is a local path, upload it to S3 # and save the S3 uri in the ProcessingInput source. parse_result = urlparse(file_input.source) if parse_result.scheme != "s3": desired_s3_uri = os.path.join( "s3://", self.sagemaker_session.default_bucket(), self._current_job_name, "input", file_input.input_name, ) s3_uri = S3Uploader.upload( local_path=file_input.source, desired_s3_uri=desired_s3_uri, session=self.sagemaker_session, ) file_input.source = s3_uri normalized_inputs.append(file_input) return normalized_inputs
def _upload_code(self, code): """Uploads a code file or directory specified as a string and returns the S3 URI. Args: code (str): A file or directory to be uploaded to S3. Returns: str: The S3 URI of the uploaded file or directory. """ desired_s3_uri = "s3://{}/{}/input/{}".format( self.sagemaker_session.default_bucket(), self._current_job_name, self._CODE_CONTAINER_INPUT_NAME, ) return S3Uploader.upload(local_path=code, desired_s3_uri=desired_s3_uri, session=self.sagemaker_session)
def multi_variant_endpoint(sagemaker_session): """ Sets up the multi variant endpoint before the integration tests run. Cleans up the multi variant endpoint after the integration tests run. """ multi_variant_endpoint.endpoint_name = unique_name_from_base( "integ-test-multi-variant-endpoint") with tests.integ.timeout.timeout_and_delete_endpoint_by_name( endpoint_name=multi_variant_endpoint.endpoint_name, sagemaker_session=sagemaker_session, hours=2, ): # Creating a model bucket = sagemaker_session.default_bucket() prefix = "sagemaker/DEMO-VariantTargeting" model_url = S3Uploader.upload( local_path=XG_BOOST_MODEL_LOCAL_PATH, desired_s3_uri="s3://" + bucket + "/" + prefix, session=sagemaker_session, ) image_uri = get_image_uri(sagemaker_session.boto_session.region_name, "xgboost", "0.90-1") multi_variant_endpoint_model = sagemaker_session.create_model( name=MODEL_NAME, role=ROLE, container_defs={ "Image": image_uri, "ModelDataUrl": model_url }, ) # Creating a multi variant endpoint variant1 = production_variant( model_name=MODEL_NAME, instance_type=DEFAULT_INSTANCE_TYPE, initial_instance_count=DEFAULT_INSTANCE_COUNT, variant_name=TEST_VARIANT_1, initial_weight=TEST_VARIANT_1_WEIGHT, ) variant2 = production_variant( model_name=MODEL_NAME, instance_type=DEFAULT_INSTANCE_TYPE, initial_instance_count=DEFAULT_INSTANCE_COUNT, variant_name=TEST_VARIANT_2, initial_weight=TEST_VARIANT_2_WEIGHT, ) sagemaker_session.endpoint_from_production_variants( name=multi_variant_endpoint.endpoint_name, production_variants=[variant1, variant2]) # Yield to run the integration tests yield multi_variant_endpoint # Cleanup resources sagemaker_session.delete_model(multi_variant_endpoint_model) sagemaker_session.sagemaker_client.delete_endpoint_config( EndpointConfigName=multi_variant_endpoint.endpoint_name) # Validate resource cleanup with pytest.raises(Exception) as exception: sagemaker_session.sagemaker_client.describe_model( ModelName=multi_variant_endpoint_model.name) assert "Could not find model" in str(exception.value) sagemaker_session.sagemaker_client.describe_endpoint_config( name=multi_variant_endpoint.endpoint_name) assert "Could not find endpoint" in str(exception.value)
from sagemaker.s3 import S3Uploader from util import data_io from util.util_methods import exec_command import os if __name__ == '__main__': local_path = f"{os.environ['HOME']}/data" data_io.download_data("https://s3.amazonaws.com/research.metamind.io/wikitext","wikitext-2-raw-v1.zip",local_path,unzip_it=True,remove_zipped=True) folder_name = "wikitext-2-raw-v1" file_to_upload = f"/tmp/{folder_name}.tar.gz" exec_command(f"cd {local_path} && tar -czvf {file_to_upload} {folder_name}") s3_prefix = "s3://tilos-ml-bucket/wikitext-2-raw-v1" S3Uploader.upload(file_to_upload, f"{s3_prefix}")
def sagemaker_training_run(args, config: SageMakerTrainingConfig, metrics=None): if os.getenv('SM_TRAINING_ENV', None): warnings.warn( "Trying to start a SageMaker container from a SageMaker container. Possible loop detected." ) if metrics is None: metrics = {} session = sagemaker_session(profile_name=args.sagemaker_profile) image_uri = ecr_ensure_image(image=Image( path=args.sagemaker_training_image, tag=args.sagemaker_training_image, accounts=args.sagemaker_training_image.split(",")), session=session.boto_session) script = args.sagemaker_script script = os.path.abspath(script) source = args.sagemaker_source if not source: source = os.path.dirname(script) if not script.startswith(source): raise ValueError("script=[{}] must be in source=[{}]") entry_point = script[len(source) + 1:] entry_point = entry_point.replace("\\", "/") print(f"Source: {source}, entry_point: {entry_point}") metric_definitions = [{'Name': k, 'Regex': v} for k, v in metrics.items()] dependencies = [getattr(args, k) for k in config.dependencies.keys()] print("Dependencies: {}".format(dependencies)) # checkpoint_local_path='/opt/ml/checkpoints/' bucket = session.default_bucket() if args.sagemaker_job_name and args.sagemaker_job_name.strip(): job_name = args.sagemaker_job_name else: job_name = name_from_base(args.sagemaker_base_job_name) tags = git_get_tags(script) tags["Source"] = 'aws-sagemaker-remote' tags["JobName"] = job_name tags["BaseJobName"] = args.sagemaker_base_job_name tags = make_tags(tags) #checkpoint_s3_uri = 's3://{}/{}/checkpoints'.format(bucket, job_name) input_prefix = "s3://{}/{}/inputs".format(bucket, job_name) iam = session.boto_session.client('iam') training_role = ensure_training_role( iam=iam, role_name=args.sagemaker_training_role) hyperparameters = { k.replace('_', '-'): str(v) for k, v in vars(args).items() if v is not None and len(str(v)) > 0 } hyperparameters['sagemaker-run'] = 'False' if args.sagemaker_checkpoint_s3 and args.sagemaker_checkpoint_s3 != 'default': if not args.sagemaker_checkpoint_s3.startswith('s3://'): raise ValueError( "--sagemaker-checkpoint-s3 must be an S3 URI (s3://...) or \"default\"" ) checkpoint_s3 = args.sagemaker_checkpoint_s3 else: checkpoint_s3 = "s3://{}/{}/checkpoints".format(bucket, job_name) hyperparameters['checkpoint-dir'] = args.sagemaker_checkpoint_container # Initial checkpoint if args.checkpoint_initial: if args.checkpoint_initial.startswith("s3://"): copy_s3(args.checkpoint_initial, checkpoint_s3, session.boto_session.client('s3')) else: S3Uploader.upload(local_path=args.checkpoint_initial, desired_s3_uri=checkpoint_s3, sagemaker_session=session) if 'sagemaker-job-name' in hyperparameters: del hyperparameters['sagemaker-job-name'] s3 = session.boto_session.client('s3') channels = config.inputs channels = process_channels(channels, args=args, session=session, prefix=input_prefix) training_inputs = build_training_inputs(channels=channels, args=args) set_suffixes(channels=channels, session=session, hyperparameters=hyperparameters) print("Hyperparameters: {}".format(hyperparameters)) if not training_inputs: training_inputs = None else: print("training_inputs: {}".format(list(training_inputs.keys()))) #import pprint #pprint.pprint({k: v.config for k, v in channels.items()}) #env = config.env estimator = PyTorch( sagemaker_session=session, base_job_name=args.sagemaker_base_job_name, entry_point=entry_point, source_dir=source, role=training_role, instance_type=args.sagemaker_training_instance, image_uri=image_uri, instance_count=1, framework_version='1.5.0', # hyperparameters=hyperparameters_from_argparse(vars(args)), metric_definitions=metric_definitions, dependencies=dependencies, checkpoint_s3_uri=checkpoint_s3, checkpoint_local_path=args.sagemaker_checkpoint_container, use_spot_instances=args.sagemaker_spot_instances, hyperparameters=hyperparameters, volume_size=args.sagemaker_volume_size, tags=tags, max_wait=args.sagemaker_max_wait if args.sagemaker_spot_instances else None, max_run=args.sagemaker_max_run) if args.sagemaker_experiment_name: sagemaker_client = session.boto_session.client('sagemaker') ensure_experiment(client=sagemaker_client, experiment_name=args.sagemaker_experiment_name) experiment_config = {"ExperimentName": args.sagemaker_experiment_name} if args.sagemaker_trial_name: experiment_config["TrialName"] = args.sagemaker_trial_name else: if args.sagemaker_trial_name: raise ValueError( "If `sagemaker_trial_name` is provided, `sagemaker_experiment_name` must be provided as well" ) experiment_config = None estimator.fit(training_inputs, job_name=job_name, wait=False, experiment_config=experiment_config) job = estimator.latest_training_job if args.sagemaker_output_json: obj = job.describe() #print("Describe: {}".format(obj)) os.makedirs(os.path.dirname(os.path.abspath( args.sagemaker_output_json)), exist_ok=True) with open(args.sagemaker_output_json, 'w') as f: json.dump(obj, f, default=json_converter, indent=4) if args.sagemaker_wait: job.wait(logs=True) # args.sagemaker_logs) # todo: # use_spot_instances # experiment_config (dict[str, str]): Experiment management configuration. # Dictionary contains three optional keys, # 'ExperimentName', 'TrialName', and 'TrialComponentDisplayName'. return estimator
def _stage_submit_deps(self, submit_deps, input_channel_name): """Prepares a list of paths to jars, py-files, or files dependencies. This prepared list of paths is provided as `spark-submit` options. The submit_deps list may include a combination of S3 URIs and local paths. Any S3 URIs are appended to the `spark-submit` option value without modification. Any local file paths are copied to a temp directory, uploaded to a default S3 URI, and included as a ProcessingInput channel to provide as local files to the SageMaker Spark container. :param submit_deps (list[str]): List of one or more dependency paths to include. :param input_channel_name (str): The `spark-submit` option name associated with the input channel. :return (Optional[ProcessingInput], str): Tuple of (left) optional ProcessingInput for the input channel, and (right) comma-delimited value for `spark-submit` option. """ if not submit_deps: raise ValueError( f"submit_deps value may not be empty. {self._submit_deps_error_message}" ) if not input_channel_name: raise ValueError("input_channel_name value may not be empty.") input_channel_s3_uri = ( f"s3://{self.sagemaker_session.default_bucket()}" f"/{self._current_job_name}/input/{input_channel_name}") use_input_channel = False spark_opt_s3_uris = [] with tempfile.TemporaryDirectory() as tmpdir: for dep_path in submit_deps: dep_url = urlparse(dep_path) # S3 URIs are included as-is in the spark-submit argument if dep_url.scheme in ["s3", "s3a"]: spark_opt_s3_uris.append(dep_path) # Local files are copied to temp directory to be uploaded to S3 elif not dep_url.scheme or dep_url.scheme == "file": if not os.path.isfile(dep_path): raise ValueError( f"submit_deps path {dep_path} is not a valid local file. " f"{self._submit_deps_error_message}") logger.info( "Copying dependency from local path %s to tmpdir %s", dep_path, tmpdir) shutil.copy(dep_path, tmpdir) else: raise ValueError( f"submit_deps path {dep_path} references unsupported filesystem " f"scheme: {dep_url.scheme} {self._submit_deps_error_message}" ) # If any local files were found and copied, upload the temp directory to S3 if os.listdir(tmpdir): logger.info("Uploading dependencies from tmpdir %s to S3 %s", tmpdir, input_channel_s3_uri) S3Uploader.upload( local_path=tmpdir, desired_s3_uri=input_channel_s3_uri, sagemaker_session=self.sagemaker_session, ) use_input_channel = True # If any local files were uploaded, construct a ProcessingInput to provide # them to the Spark container and form the spark-submit option from a # combination of S3 URIs and container's local input path if use_input_channel: input_channel = ProcessingInput( source=input_channel_s3_uri, destination= f"{self._conf_container_base_path}{input_channel_name}", input_name=input_channel_name, ) spark_opt = ",".join(spark_opt_s3_uris + [input_channel.destination]) # If no local files were uploaded, form the spark-submit option from a list of S3 URIs else: input_channel = None spark_opt = ",".join(spark_opt_s3_uris) return input_channel, spark_opt
def _write_to_remote_storage(cls, local, remote): # Currently, supports Amazon S3 exclusively S3Uploader.upload(local, remote)
def test_model_registration_with_drift_check_baselines( sagemaker_session, role, pipeline_name, ): instance_count = ParameterInteger(name="InstanceCount", default_value=1) instance_type = ParameterString(name="InstanceType", default_value="ml.m5.xlarge") # upload model data to s3 model_local_path = os.path.join(DATA_DIR, "mxnet_mnist/model.tar.gz") model_base_uri = "s3://{}/{}/input/model/{}".format( sagemaker_session.default_bucket(), "register_model_test_with_drift_baseline", utils.unique_name_from_base("model"), ) model_uri = S3Uploader.upload(model_local_path, model_base_uri, sagemaker_session=sagemaker_session) model_uri_param = ParameterString(name="model_uri", default_value=model_uri) # upload metrics to s3 metrics_data = ( '{"regression_metrics": {"mse": {"value": 4.925353410353891, ' '"standard_deviation": 2.219186917819692}}}') metrics_base_uri = "s3://{}/{}/input/metrics/{}".format( sagemaker_session.default_bucket(), "register_model_test_with_drift_baseline", utils.unique_name_from_base("metrics"), ) metrics_uri = S3Uploader.upload_string_as_file_body( body=metrics_data, desired_s3_uri=metrics_base_uri, sagemaker_session=sagemaker_session, ) metrics_uri_param = ParameterString(name="metrics_uri", default_value=metrics_uri) model_metrics = ModelMetrics( bias=MetricsSource( s3_uri=metrics_uri_param, content_type="application/json", ), explainability=MetricsSource( s3_uri=metrics_uri_param, content_type="application/json", ), bias_pre_training=MetricsSource( s3_uri=metrics_uri_param, content_type="application/json", ), bias_post_training=MetricsSource( s3_uri=metrics_uri_param, content_type="application/json", ), ) drift_check_baselines = DriftCheckBaselines( model_statistics=MetricsSource( s3_uri=metrics_uri_param, content_type="application/json", ), model_constraints=MetricsSource( s3_uri=metrics_uri_param, content_type="application/json", ), model_data_statistics=MetricsSource( s3_uri=metrics_uri_param, content_type="application/json", ), model_data_constraints=MetricsSource( s3_uri=metrics_uri_param, content_type="application/json", ), bias_config_file=FileSource( s3_uri=metrics_uri_param, content_type="application/json", ), bias_pre_training_constraints=MetricsSource( s3_uri=metrics_uri_param, content_type="application/json", ), bias_post_training_constraints=MetricsSource( s3_uri=metrics_uri_param, content_type="application/json", ), explainability_constraints=MetricsSource( s3_uri=metrics_uri_param, content_type="application/json", ), explainability_config_file=FileSource( s3_uri=metrics_uri_param, content_type="application/json", ), ) customer_metadata_properties = {"key1": "value1"} estimator = XGBoost( entry_point="training.py", source_dir=os.path.join(DATA_DIR, "sip"), instance_type=instance_type, instance_count=instance_count, framework_version="0.90-2", sagemaker_session=sagemaker_session, py_version="py3", role=role, ) step_register = RegisterModel( name="MyRegisterModelStep", estimator=estimator, model_data=model_uri_param, content_types=["application/json"], response_types=["application/json"], inference_instances=["ml.t2.medium", "ml.m5.xlarge"], transform_instances=["ml.m5.xlarge"], model_package_group_name="testModelPackageGroup", model_metrics=model_metrics, drift_check_baselines=drift_check_baselines, customer_metadata_properties=customer_metadata_properties, ) pipeline = Pipeline( name=pipeline_name, parameters=[ model_uri_param, metrics_uri_param, instance_type, instance_count, ], steps=[step_register], sagemaker_session=sagemaker_session, ) try: response = pipeline.create(role) create_arn = response["PipelineArn"] for _ in retries( max_retry_count=5, exception_message_prefix= "Waiting for a successful execution of pipeline", seconds_to_sleep=10, ): execution = pipeline.start(parameters={ "model_uri": model_uri, "metrics_uri": metrics_uri }) response = execution.describe() assert response["PipelineArn"] == create_arn try: execution.wait(delay=30, max_attempts=60) except WaiterError: pass execution_steps = execution.list_steps() assert len(execution_steps) == 1 failure_reason = execution_steps[0].get("FailureReason", "") if failure_reason != "": logging.error( f"Pipeline execution failed with error: {failure_reason}." " Retrying..") continue assert execution_steps[0]["StepStatus"] == "Succeeded" assert execution_steps[0]["StepName"] == "MyRegisterModelStep" response = sagemaker_session.sagemaker_client.describe_model_package( ModelPackageName=execution_steps[0]["Metadata"] ["RegisterModel"]["Arn"]) assert (response["ModelMetrics"]["Explainability"]["Report"] ["ContentType"] == "application/json") assert (response["DriftCheckBaselines"]["Bias"][ "PreTrainingConstraints"]["ContentType"] == "application/json") assert (response["DriftCheckBaselines"]["Explainability"] ["Constraints"]["ContentType"] == "application/json") assert (response["DriftCheckBaselines"]["ModelQuality"] ["Statistics"]["ContentType"] == "application/json") assert (response["DriftCheckBaselines"]["ModelDataQuality"] ["Statistics"]["ContentType"] == "application/json") assert response[ "CustomerMetadataProperties"] == customer_metadata_properties break finally: try: pipeline.delete() except Exception: pass
def _write_to_remote_storage(cls, local, remote): # Currently, only supports Amazon S3 S3Uploader.upload(local, remote)
# The name used for the project which is used for things like S3 bucket location prefix project_name = 'customer-churn-wx-' + dateAsString # The name used when the Model is created model_name='customer-churn-model-wx-' + dateAsString training_job_name = "CustomerChurnTrainingJob-wx-" + dateAsString # specify the roles that will be used by the various artifacts workflow_execution_role = os.getenv('workflow_execution_role') sagemaker_execution_role = os.getenv('sagemaker_execution_role') glue_role = os.getenv('glue_role') lambda_role = os.getenv('lambda_role') registry_lambda_role= os.getenv('model_registry_lambda_role') data_source = S3Uploader.upload(local_path='./data/customer-churn.csv', desired_s3_uri='s3://{}/{}'.format(bucket, project_name), session=session) train_prefix = 'train' val_prefix = 'validation' train_data = 's3://{}/{}/{}/'.format(bucket, project_name, train_prefix) validation_data = 's3://{}/{}/{}/'.format(bucket, project_name, val_prefix) glue_script_location = S3Uploader.upload(local_path='./code/glue_etl.py', desired_s3_uri='s3://{}/{}'.format(bucket, project_name), session=session) glue_client = boto3.client('glue')
def save_model(net, output_file): """This method saves the model to file""" file_name = "net.params" # local version net.save_parameters(file_name) S3Uploader.upload(file_name, output_file)