def test_wait_with_logs(sagemaker_session): training_job = _TrainingJob(sagemaker_session, JOB_NAME) training_job.wait() sagemaker_session.logs_for_job.assert_called_once() assert not sagemaker_session.wait_for_job.called
def test_wait_with_logs(sagemaker_session): training_job = _TrainingJob(sagemaker_session, JOB_NAME) training_job.wait() sagemaker_session.logs_for_job.assert_called_once() assert not sagemaker_session.wait_for_job.called
def test_algorithm_create_transformer_with_product_id(create_model, sagemaker_session): response = copy.deepcopy(DESCRIBE_ALGORITHM_RESPONSE) response['ProductId'] = 'some-product-id' sagemaker_session.sagemaker_client.describe_algorithm = Mock( return_value=response) estimator = AlgorithmEstimator( algorithm_arn= 'arn:aws:sagemaker:us-east-2:1234:algorithm/scikit-decision-trees', role='SageMakerRole', train_instance_type='ml.m4.xlarge', train_instance_count=1, sagemaker_session=sagemaker_session, ) estimator.latest_training_job = _TrainingJob(sagemaker_session, 'some-job-name') model = Mock() model.name = 'my-model' create_model.return_value = model transformer = estimator.transformer(instance_count=1, instance_type='ml.m4.xlarge') assert transformer.env is None
def test_estimator_transformer_creation_with_optional_params(sagemaker_session): base_name = 'foo' estimator = Estimator(image_name=IMAGE_NAME, role=ROLE, train_instance_count=INSTANCE_COUNT, train_instance_type=INSTANCE_TYPE, sagemaker_session=sagemaker_session, base_job_name=base_name) estimator.latest_training_job = _TrainingJob(sagemaker_session, JOB_NAME) sagemaker_session.create_model_from_job.return_value = JOB_NAME strategy = 'MultiRecord' assemble_with = 'Line' kms_key = 'key' accept = 'text/csv' max_concurrent_transforms = 1 max_payload = 6 env = {'FOO': 'BAR'} transformer = estimator.transformer(INSTANCE_COUNT, INSTANCE_TYPE, strategy=strategy, assemble_with=assemble_with, output_path=OUTPUT_PATH, output_kms_key=kms_key, accept=accept, tags=TAGS, max_concurrent_transforms=max_concurrent_transforms, max_payload=max_payload, env=env, role=ROLE) sagemaker_session.create_model_from_job.assert_called_with(JOB_NAME, role=ROLE) assert transformer.strategy == strategy assert transformer.assemble_with == assemble_with assert transformer.output_path == OUTPUT_PATH assert transformer.output_kms_key == kms_key assert transformer.accept == accept assert transformer.max_concurrent_transforms == max_concurrent_transforms assert transformer.max_payload == max_payload assert transformer.env == env assert transformer.base_transform_job_name == base_name assert transformer.tags == TAGS
def test_framework_transformer_creation_with_optional_params(name_from_image, sagemaker_session): base_name = 'foo' fw = DummyFramework(entry_point=SCRIPT_PATH, role=ROLE, train_instance_count=INSTANCE_COUNT, train_instance_type=INSTANCE_TYPE, sagemaker_session=sagemaker_session, base_job_name=base_name) fw.latest_training_job = _TrainingJob(sagemaker_session, JOB_NAME) strategy = 'MultiRecord' assemble_with = 'Line' kms_key = 'key' accept = 'text/csv' max_concurrent_transforms = 1 max_payload = 6 env = {'FOO': 'BAR'} new_role = 'dummy-model-role' transformer = fw.transformer(INSTANCE_COUNT, INSTANCE_TYPE, strategy=strategy, assemble_with=assemble_with, output_path=OUTPUT_PATH, output_kms_key=kms_key, accept=accept, tags=TAGS, max_concurrent_transforms=max_concurrent_transforms, max_payload=max_payload, env=env, role=new_role, model_server_workers=1) sagemaker_session.create_model.assert_called_with(MODEL_IMAGE, new_role, MODEL_CONTAINER_DEF) assert transformer.strategy == strategy assert transformer.assemble_with == assemble_with assert transformer.output_path == OUTPUT_PATH assert transformer.output_kms_key == kms_key assert transformer.accept == accept assert transformer.max_concurrent_transforms == max_concurrent_transforms assert transformer.max_payload == max_payload assert transformer.env == env assert transformer.base_transform_job_name == base_name assert transformer.tags == TAGS
def test_algorithm_create_transformer(create_model, sagemaker_session): sagemaker_session.sagemaker_client.describe_algorithm = Mock( return_value=DESCRIBE_ALGORITHM_RESPONSE) estimator = AlgorithmEstimator( algorithm_arn= 'arn:aws:sagemaker:us-east-2:1234:algorithm/scikit-decision-trees', role='SageMakerRole', train_instance_type='ml.m4.xlarge', train_instance_count=1, sagemaker_session=sagemaker_session, ) estimator.latest_training_job = _TrainingJob(sagemaker_session, 'some-job-name') model = Mock() model.name = 'my-model' create_model.return_value = model transformer = estimator.transformer(instance_count=1, instance_type='ml.m4.xlarge') assert isinstance(transformer, Transformer) create_model.assert_called() assert transformer.model_name == 'my-model'
def attach(cls, training_job_name, sagemaker_session=None, model_channel_name="model"): """Attach to an existing training job. Create an Estimator bound to an existing training job, each subclass is responsible to implement ``_prepare_init_params_from_job_description()`` as this method delegates the actual conversion of a training job description to the arguments that the class constructor expects. After attaching, if the training job has a Complete status, it can be ``deploy()`` ed to create a SageMaker Endpoint and return a ``Predictor``. If the training job is in progress, attach will block and display log messages from the training job, until the training job completes. Examples: >>> my_estimator.fit(wait=False) >>> training_job_name = my_estimator.latest_training_job.name Later on: >>> attached_estimator = Estimator.attach(training_job_name) >>> attached_estimator.deploy() Args: training_job_name (str): The name of the training job to attach to. sagemaker_session (sagemaker.session.Session): Session object which manages interactions with Amazon SageMaker APIs and any other AWS services needed. If not specified, the estimator creates one using the default AWS configuration chain. model_channel_name (str): Name of the channel where pre-trained model data will be downloaded (default: 'model'). If no channel with the same name exists in the training job, this option will be ignored. Returns: Instance of the calling ``Estimator`` Class with the attached training job. """ sagemaker_session = sagemaker_session or Session() job_details = sagemaker_session.sagemaker_client.describe_training_job( TrainingJobName=training_job_name ) init_params = cls._prepare_init_params_from_job_description(job_details, model_channel_name) tags = sagemaker_session.sagemaker_client.list_tags( ResourceArn=job_details["TrainingJobArn"] )["Tags"] init_params.update(tags=tags) estimator = cls(sagemaker_session=sagemaker_session, **init_params) estimator.latest_training_job = _TrainingJob( sagemaker_session=sagemaker_session, job_name=training_job_name ) estimator._current_job_name = estimator.latest_training_job.name estimator.latest_training_job.wait() # pylint gets confused thinking that estimator is an EstimatorBase instance, but it actually # is a Framework or any of its derived classes. We can safely ignore the no-member errors. estimator.uploaded_code = UploadedCode( estimator.source_dir, estimator.entry_point # pylint: disable=no-member ) return estimator
def test_transformer_creation_without_endpoint_type(create_model, sagemaker_session): model = Mock() create_model.return_value = model tf = TensorFlow( entry_point=SCRIPT_PATH, role=ROLE, sagemaker_session=sagemaker_session, train_instance_count=INSTANCE_COUNT, train_instance_type=INSTANCE_TYPE, ) tf.latest_training_job = _TrainingJob(sagemaker_session, "some-job-name") tf.transformer(INSTANCE_COUNT, INSTANCE_TYPE) create_model.assert_called_with( endpoint_type=None, model_server_workers=None, role=ROLE, vpc_config_override="VPC_CONFIG_DEFAULT", entry_point=None, ) model.transformer.assert_called_with( INSTANCE_COUNT, INSTANCE_TYPE, accept=None, assemble_with=None, env=None, max_concurrent_transforms=None, max_payload=None, output_kms_key=None, output_path=None, strategy=None, tags=None, volume_kms_key=None, )
def test_start_new_not_local_mode_error(sagemaker_session): training_job = _TrainingJob(sagemaker_session, JOB_NAME) inputs = 'file://mybucket/train' estimator = Estimator(IMAGE_NAME, ROLE, INSTANCE_COUNT, INSTANCE_TYPE, output_path=OUTPUT_PATH, sagemaker_session=sagemaker_session) with pytest.raises(ValueError) as error: training_job.start_new(estimator, inputs) assert 'File URIs are supported in local mode only. Please use a S3 URI instead.' == str(error)
def test_transformer_creation_without_optional_args( name_from_base, create_model, sagemaker_session, tensorflow_inference_version, tensorflow_inference_py_version, ): if version.Version(tensorflow_inference_version) < version.Version("1.11"): pytest.skip( "Legacy TF version requires explicit image URI, and " "this logic is tested in test_create_model_with_custom_image.") model_name = "generated-model-name" name_from_base.return_value = model_name model = Mock() create_model.return_value = model base_job_name = "tensorflow" tf = TensorFlow( entry_point=SCRIPT_PATH, framework_version=tensorflow_inference_version, py_version=tensorflow_inference_py_version, role=ROLE, sagemaker_session=sagemaker_session, instance_count=INSTANCE_COUNT, instance_type=INSTANCE_TYPE, base_job_name=base_job_name, ) tf.latest_training_job = _TrainingJob(sagemaker_session, "some-job-name") tf.transformer(INSTANCE_COUNT, INSTANCE_TYPE) name_from_base.assert_called_with(base_job_name) create_model.assert_called_with( role=ROLE, vpc_config_override="VPC_CONFIG_DEFAULT", entry_point=None, enable_network_isolation=False, name=model_name, ) model.transformer.assert_called_with( INSTANCE_COUNT, INSTANCE_TYPE, accept=None, assemble_with=None, env=None, max_concurrent_transforms=None, max_payload=None, output_kms_key=None, output_path=None, strategy=None, tags=None, volume_kms_key=None, )
def test_start_new(sagemaker_session): training_job = _TrainingJob(sagemaker_session, JOB_NAME) hyperparameters = {'mock': 'hyperparameters'} inputs = 's3://mybucket/train' estimator = Estimator(IMAGE_NAME, ROLE, INSTANCE_COUNT, INSTANCE_TYPE, output_path=OUTPUT_PATH, sagemaker_session=sagemaker_session, hyperparameters=hyperparameters) started_training_job = training_job.start_new(estimator, inputs) called_args = sagemaker_session.train.call_args assert started_training_job.sagemaker_session == sagemaker_session assert called_args[1]['hyperparameters'] == hyperparameters sagemaker_session.train.assert_called_once()
def test_estimator_transformer_creation(sagemaker_session): estimator = Estimator(image_name=IMAGE_NAME, role=ROLE, train_instance_count=INSTANCE_COUNT, train_instance_type=INSTANCE_TYPE, sagemaker_session=sagemaker_session) estimator.latest_training_job = _TrainingJob(sagemaker_session, JOB_NAME) sagemaker_session.create_model_from_job.return_value = JOB_NAME transformer = estimator.transformer(INSTANCE_COUNT, INSTANCE_TYPE) sagemaker_session.create_model_from_job.assert_called_with(JOB_NAME, role=None) assert isinstance(transformer, Transformer) assert transformer.sagemaker_session == sagemaker_session assert transformer.instance_count == INSTANCE_COUNT assert transformer.instance_type == INSTANCE_TYPE assert transformer.model_name == JOB_NAME assert transformer.tags is None
def test_framework_transformer_creation(name_from_image, sagemaker_session): fw = DummyFramework(entry_point=SCRIPT_PATH, role=ROLE, train_instance_count=INSTANCE_COUNT, train_instance_type=INSTANCE_TYPE, sagemaker_session=sagemaker_session) fw.latest_training_job = _TrainingJob(sagemaker_session, JOB_NAME) transformer = fw.transformer(INSTANCE_COUNT, INSTANCE_TYPE) name_from_image.assert_called_with(MODEL_IMAGE) sagemaker_session.create_model.assert_called_with(MODEL_IMAGE, ROLE, MODEL_CONTAINER_DEF) assert isinstance(transformer, Transformer) assert transformer.sagemaker_session == sagemaker_session assert transformer.instance_count == INSTANCE_COUNT assert transformer.instance_type == INSTANCE_TYPE assert transformer.model_name == MODEL_IMAGE assert transformer.tags is None assert transformer.env == {}
def test_transformer_creation_with_optional_args( create_model, sagemaker_session, tensorflow_inference_version, tensorflow_inference_py_version): if version.Version(tensorflow_inference_version) < version.Version("1.11"): pytest.skip( "Legacy TF version requires explicit image URI, and " "this logic is tested in test_create_model_with_custom_image.") model = Mock() create_model.return_value = model tf = TensorFlow( entry_point=SCRIPT_PATH, framework_version=tensorflow_inference_version, py_version=tensorflow_inference_py_version, role=ROLE, sagemaker_session=sagemaker_session, instance_count=INSTANCE_COUNT, instance_type=INSTANCE_TYPE, ) tf.latest_training_job = _TrainingJob(sagemaker_session, "some-job-name") strategy = "SingleRecord" assemble_with = "Line" output_path = "s3://{}/batch-output".format(BUCKET_NAME) kms_key = "kms" accept_type = "text/bytes" env = {"foo": "bar"} max_concurrent_transforms = 3 max_payload = 100 tags = {"Key": "foo", "Value": "bar"} new_role = "role" vpc_config = {"Subnets": ["1234"], "SecurityGroupIds": ["5678"]} model_name = "model-name" tf.transformer( INSTANCE_COUNT, INSTANCE_TYPE, strategy=strategy, assemble_with=assemble_with, output_path=output_path, output_kms_key=kms_key, accept=accept_type, env=env, max_concurrent_transforms=max_concurrent_transforms, max_payload=max_payload, tags=tags, role=new_role, volume_kms_key=kms_key, entry_point=SERVING_SCRIPT_FILE, vpc_config_override=vpc_config, enable_network_isolation=True, model_name=model_name, ) create_model.assert_called_with( role=new_role, vpc_config_override=vpc_config, entry_point=SERVING_SCRIPT_FILE, enable_network_isolation=True, name=model_name, ) model.transformer.assert_called_with( INSTANCE_COUNT, INSTANCE_TYPE, accept=accept_type, assemble_with=assemble_with, env=env, max_concurrent_transforms=max_concurrent_transforms, max_payload=max_payload, output_kms_key=kms_key, output_path=output_path, strategy=strategy, tags=tags, volume_kms_key=kms_key, )
def test_transformer_creation_with_optional_args(create_model, sagemaker_session): model = Mock() create_model.return_value = model tf = TensorFlow( entry_point=SCRIPT_PATH, role=ROLE, sagemaker_session=sagemaker_session, train_instance_count=INSTANCE_COUNT, train_instance_type=INSTANCE_TYPE, ) tf.latest_training_job = _TrainingJob(sagemaker_session, "some-job-name") strategy = "SingleRecord" assemble_with = "Line" output_path = "s3://{}/batch-output".format(BUCKET_NAME) kms_key = "kms" accept_type = "text/bytes" env = {"foo": "bar"} max_concurrent_transforms = 3 max_payload = 100 tags = {"Key": "foo", "Value": "bar"} new_role = "role" model_server_workers = 2 vpc_config = {"Subnets": ["1234"], "SecurityGroupIds": ["5678"]} tf.transformer( INSTANCE_COUNT, INSTANCE_TYPE, strategy=strategy, assemble_with=assemble_with, output_path=output_path, output_kms_key=kms_key, accept=accept_type, env=env, max_concurrent_transforms=max_concurrent_transforms, max_payload=max_payload, tags=tags, role=new_role, model_server_workers=model_server_workers, volume_kms_key=kms_key, endpoint_type="tensorflow-serving", entry_point=SERVING_SCRIPT_FILE, vpc_config_override=vpc_config, enable_network_isolation=True, ) create_model.assert_called_with( model_server_workers=model_server_workers, role=new_role, vpc_config_override=vpc_config, endpoint_type="tensorflow-serving", entry_point=SERVING_SCRIPT_FILE, enable_network_isolation=True, ) model.transformer.assert_called_with( INSTANCE_COUNT, INSTANCE_TYPE, accept=accept_type, assemble_with=assemble_with, env=env, max_concurrent_transforms=max_concurrent_transforms, max_payload=max_payload, output_kms_key=kms_key, output_path=output_path, strategy=strategy, tags=tags, volume_kms_key=kms_key, )