def test_algorithm_hyperparameter_categorical_range(sagemaker_session): hyperparameters = [ { 'Description': 'A continuous hyperparameter', 'Type': 'Categorical', 'Name': 'hp1', 'Range': {'CategoricalParameterRangeSpecification': {'Values': ['TF', 'MXNet']}}, 'IsTunable': True, 'IsRequired': False, 'DefaultValue': '100', } ] some_algo = copy.deepcopy(DESCRIBE_ALGORITHM_RESPONSE) some_algo['TrainingSpecification']['SupportedHyperParameters'] = hyperparameters sagemaker_session.sagemaker_client.describe_algorithm = Mock(return_value=some_algo) estimator = AlgorithmEstimator( algorithm_arn='arn:aws:sagemaker:us-east-2:1234:algorithm/scikit-decision-trees', role='SageMakerRole', train_instance_type='ml.m4.2xlarge', train_instance_count=1, sagemaker_session=sagemaker_session, ) estimator.set_hyperparameters(hp1='MXNet') estimator.set_hyperparameters(hp1='TF') with pytest.raises(ValueError): estimator.set_hyperparameters(hp1='Chainer') with pytest.raises(ValueError): estimator.set_hyperparameters(hp1='MxNET')
def test_algorithm_create_transformer(create_model, sagemaker_session): sagemaker_session.sagemaker_client.describe_algorithm = Mock( return_value=DESCRIBE_ALGORITHM_RESPONSE) estimator = AlgorithmEstimator( algorithm_arn= 'arn:aws:sagemaker:us-east-2:1234:algorithm/scikit-decision-trees', role='SageMakerRole', train_instance_type='ml.m4.xlarge', train_instance_count=1, sagemaker_session=sagemaker_session, ) estimator.latest_training_job = _TrainingJob(sagemaker_session, 'some-job-name') model = Mock() model.name = 'my-model' create_model.return_value = model transformer = estimator.transformer(instance_count=1, instance_type='ml.m4.xlarge') assert isinstance(transformer, Transformer) create_model.assert_called() assert transformer.model_name == 'my-model'
def test_algorithm_hyperparameter_categorical_range(session): hyperparameters = [ { "Description": "A continuous hyperparameter", "Type": "Categorical", "Name": "hp1", "Range": {"CategoricalParameterRangeSpecification": {"Values": ["TF", "MXNet"]}}, "IsTunable": True, "IsRequired": False, "DefaultValue": "100", } ] some_algo = copy.deepcopy(DESCRIBE_ALGORITHM_RESPONSE) some_algo["TrainingSpecification"]["SupportedHyperParameters"] = hyperparameters session.sagemaker_client.describe_algorithm = Mock(return_value=some_algo) estimator = AlgorithmEstimator( algorithm_arn="arn:aws:sagemaker:us-east-2:1234:algorithm/scikit-decision-trees", role="SageMakerRole", train_instance_type="ml.m4.2xlarge", train_instance_count=1, sagemaker_session=session, ) estimator.set_hyperparameters(hp1="MXNet") estimator.set_hyperparameters(hp1="TF") with pytest.raises(ValueError): estimator.set_hyperparameters(hp1="Chainer") with pytest.raises(ValueError): estimator.set_hyperparameters(hp1="MxNET")
def test_algorithm_create_transformer_with_product_id(create_model, sagemaker_session): response = copy.deepcopy(DESCRIBE_ALGORITHM_RESPONSE) response['ProductId'] = 'some-product-id' sagemaker_session.sagemaker_client.describe_algorithm = Mock( return_value=response) estimator = AlgorithmEstimator( algorithm_arn= 'arn:aws:sagemaker:us-east-2:1234:algorithm/scikit-decision-trees', role='SageMakerRole', train_instance_type='ml.m4.xlarge', train_instance_count=1, sagemaker_session=sagemaker_session, ) estimator.latest_training_job = _TrainingJob(sagemaker_session, 'some-job-name') model = Mock() model.name = 'my-model' create_model.return_value = model transformer = estimator.transformer(instance_count=1, instance_type='ml.m4.xlarge') assert transformer.env is None
def test_algorithm_train_instance_types_valid_instance_types(session): describe_algo_response = copy.deepcopy(DESCRIBE_ALGORITHM_RESPONSE) instance_types = ["ml.m4.xlarge", "ml.m5.2xlarge"] describe_algo_response["TrainingSpecification"][ "SupportedTrainingInstanceTypes"] = instance_types session.sagemaker_client.describe_algorithm = Mock( return_value=describe_algo_response) AlgorithmEstimator( algorithm_arn= "arn:aws:sagemaker:us-east-2:1234:algorithm/scikit-decision-trees", role="SageMakerRole", instance_type="ml.m4.xlarge", instance_count=1, sagemaker_session=session, ) AlgorithmEstimator( algorithm_arn= "arn:aws:sagemaker:us-east-2:1234:algorithm/scikit-decision-trees", role="SageMakerRole", instance_type="ml.m5.2xlarge", instance_count=1, sagemaker_session=session, )
def test_algorithm_train_instance_types_valid_instance_types(sagemaker_session): describe_algo_response = copy.deepcopy(DESCRIBE_ALGORITHM_RESPONSE) train_instance_types = ['ml.m4.xlarge', 'ml.m5.2xlarge'] describe_algo_response['TrainingSpecification'][ 'SupportedTrainingInstanceTypes' ] = train_instance_types sagemaker_session.sagemaker_client.describe_algorithm = Mock( return_value=describe_algo_response ) AlgorithmEstimator( algorithm_arn='arn:aws:sagemaker:us-east-2:1234:algorithm/scikit-decision-trees', role='SageMakerRole', train_instance_type='ml.m4.xlarge', train_instance_count=1, sagemaker_session=sagemaker_session, ) AlgorithmEstimator( algorithm_arn='arn:aws:sagemaker:us-east-2:1234:algorithm/scikit-decision-trees', role='SageMakerRole', train_instance_type='ml.m5.2xlarge', train_instance_count=1, sagemaker_session=sagemaker_session, )
def test_prepare_for_training_with_name_based_on_algorithm(sagemaker_session): estimator = AlgorithmEstimator( algorithm_arn= 'arn:aws:sagemaker:us-west-2:1234:algorithm/scikit-decision-trees-1542410022', role='some_image', train_instance_count=1, train_instance_type='ml.m4.xlarge', sagemaker_session=sagemaker_session) estimator._prepare_for_training() assert 'scikit-decision-trees-1542410022' in estimator._current_job_name
def test_algorithm_enable_network_isolation_no_product_id(session): session.sagemaker_client.describe_algorithm = Mock(return_value=DESCRIBE_ALGORITHM_RESPONSE) estimator = AlgorithmEstimator( algorithm_arn="arn:aws:sagemaker:us-east-2:1234:algorithm/scikit-decision-trees", role="SageMakerRole", train_instance_type="ml.m4.xlarge", train_instance_count=1, sagemaker_session=session, ) network_isolation = estimator.enable_network_isolation() assert network_isolation is False
def test_algorithm_create_transformer_without_completed_training_job(session): session.sagemaker_client.describe_algorithm = Mock(return_value=DESCRIBE_ALGORITHM_RESPONSE) estimator = AlgorithmEstimator( algorithm_arn="arn:aws:sagemaker:us-east-2:1234:algorithm/scikit-decision-trees", role="SageMakerRole", train_instance_type="ml.m4.xlarge", train_instance_count=1, sagemaker_session=session, ) with pytest.raises(RuntimeError) as error: estimator.transformer(instance_count=1, instance_type="ml.m4.xlarge") assert "No finished training job found associated with this estimator" in str(error)
def test_algorithm_hyperparameter_continuous_range_valid_range(session): hyperparameters = [{ "Description": "A continuous hyperparameter", "Type": "Continuous", "Name": "max_leaf_nodes", "Range": { "ContinuousParameterRangeSpecification": { "MinValue": "0.0", "MaxValue": "1.0" } }, "IsTunable": True, "IsRequired": False, "DefaultValue": "100", }] some_algo = copy.deepcopy(DESCRIBE_ALGORITHM_RESPONSE) some_algo["TrainingSpecification"][ "SupportedHyperParameters"] = hyperparameters session.sagemaker_client.describe_algorithm = Mock(return_value=some_algo) estimator = AlgorithmEstimator( algorithm_arn= "arn:aws:sagemaker:us-east-2:1234:algorithm/scikit-decision-trees", role="SageMakerRole", train_instance_type="ml.m4.2xlarge", train_instance_count=1, sagemaker_session=session, ) estimator.set_hyperparameters(max_leaf_nodes=0) estimator.set_hyperparameters(max_leaf_nodes=1.0) estimator.set_hyperparameters(max_leaf_nodes=0.5) estimator.set_hyperparameters(max_leaf_nodes=1)
def test_algorithm_hyperparameter_continuous_range_valid_range(sagemaker_session): hyperparameters = [ { 'Description': 'A continuous hyperparameter', 'Type': 'Continuous', 'Name': 'max_leaf_nodes', 'Range': { 'ContinuousParameterRangeSpecification': {'MinValue': '0.0', 'MaxValue': '1.0'} }, 'IsTunable': True, 'IsRequired': False, 'DefaultValue': '100', } ] some_algo = copy.deepcopy(DESCRIBE_ALGORITHM_RESPONSE) some_algo['TrainingSpecification']['SupportedHyperParameters'] = hyperparameters sagemaker_session.sagemaker_client.describe_algorithm = Mock(return_value=some_algo) estimator = AlgorithmEstimator( algorithm_arn='arn:aws:sagemaker:us-east-2:1234:algorithm/scikit-decision-trees', role='SageMakerRole', train_instance_type='ml.m4.2xlarge', train_instance_count=1, sagemaker_session=sagemaker_session, ) estimator.set_hyperparameters(max_leaf_nodes=0) estimator.set_hyperparameters(max_leaf_nodes=1.0) estimator.set_hyperparameters(max_leaf_nodes=0.5) estimator.set_hyperparameters(max_leaf_nodes=1)
def test_algorithm_enable_network_isolation_with_product_id(session): response = copy.deepcopy(DESCRIBE_ALGORITHM_RESPONSE) response["ProductId"] = "some-product-id" session.sagemaker_client.describe_algorithm = Mock(return_value=response) estimator = AlgorithmEstimator( algorithm_arn="arn:aws:sagemaker:us-east-2:1234:algorithm/scikit-decision-trees", role="SageMakerRole", train_instance_type="ml.m4.xlarge", train_instance_count=1, sagemaker_session=session, ) network_isolation = estimator.enable_network_isolation() assert network_isolation is True
def test_algorithm_required_hyperparameters_are_provided(session): hyperparameters = [ { "Description": "A categorical hyperparameter", "Type": "Categorical", "Name": "hp1", "Range": { "CategoricalParameterRangeSpecification": { "Values": ["TF", "MXNet"] } }, "IsTunable": True, "IsRequired": True, }, { "Name": "hp2", "Description": "A categorical hyperparameter", "Type": "Categorical", "IsTunable": False, "IsRequired": True, }, { "Name": "free_text_hp1", "Description": "You can write anything here", "Type": "FreeText", "IsTunable": False, "IsRequired": True, }, ] some_algo = copy.deepcopy(DESCRIBE_ALGORITHM_RESPONSE) some_algo["TrainingSpecification"][ "SupportedHyperParameters"] = hyperparameters session.sagemaker_client.describe_algorithm = Mock(return_value=some_algo) estimator = AlgorithmEstimator( algorithm_arn= "arn:aws:sagemaker:us-east-2:1234:algorithm/scikit-decision-trees", role="SageMakerRole", train_instance_type="ml.m4.2xlarge", train_instance_count=1, sagemaker_session=session, ) # All 3 Hyperparameters are provided estimator.set_hyperparameters(hp1="TF", hp2="TF2", free_text_hp1="Hello!")
def test_algorithm_distributed_training_validation(session): distributed_algo = copy.deepcopy(DESCRIBE_ALGORITHM_RESPONSE) distributed_algo["TrainingSpecification"][ "SupportsDistributedTraining"] = True single_instance_algo = copy.deepcopy(DESCRIBE_ALGORITHM_RESPONSE) single_instance_algo["TrainingSpecification"][ "SupportsDistributedTraining"] = False session.sagemaker_client.describe_algorithm = Mock( return_value=distributed_algo) # Distributed training should work for Distributed and Single instance. AlgorithmEstimator( algorithm_arn= "arn:aws:sagemaker:us-east-2:1234:algorithm/scikit-decision-trees", role="SageMakerRole", train_instance_type="ml.m4.xlarge", train_instance_count=1, sagemaker_session=session, ) AlgorithmEstimator( algorithm_arn= "arn:aws:sagemaker:us-east-2:1234:algorithm/scikit-decision-trees", role="SageMakerRole", train_instance_type="ml.m4.xlarge", train_instance_count=2, sagemaker_session=session, ) session.sagemaker_client.describe_algorithm = Mock( return_value=single_instance_algo) # distributed training on a single instance algorithm should fail. with pytest.raises(ValueError): AlgorithmEstimator( algorithm_arn= "arn:aws:sagemaker:us-east-2:1234:algorithm/scikit-decision-trees", role="SageMakerRole", train_instance_type="ml.m5.2xlarge", train_instance_count=2, sagemaker_session=session, )
def test_algorithm_required_hyperparameters_are_provided(sagemaker_session): hyperparameters = [{ 'Description': 'A categorical hyperparameter', 'Type': 'Categorical', 'Name': 'hp1', 'Range': { 'CategoricalParameterRangeSpecification': { 'Values': ['TF', 'MXNet'] } }, 'IsTunable': True, 'IsRequired': True, }, { 'Name': 'hp2', 'Description': 'A categorical hyperparameter', 'Type': 'Categorical', 'IsTunable': False, 'IsRequired': True }, { 'Name': 'free_text_hp1', 'Description': 'You can write anything here', 'Type': 'FreeText', 'IsTunable': False, 'IsRequired': True }] some_algo = copy.deepcopy(DESCRIBE_ALGORITHM_RESPONSE) some_algo['TrainingSpecification'][ 'SupportedHyperParameters'] = hyperparameters sagemaker_session.sagemaker_client.describe_algorithm = Mock( return_value=some_algo) estimator = AlgorithmEstimator( algorithm_arn= 'arn:aws:sagemaker:us-east-2:1234:algorithm/scikit-decision-trees', role='SageMakerRole', train_instance_type='ml.m4.2xlarge', train_instance_count=1, sagemaker_session=sagemaker_session, ) # All 3 Hyperparameters are provided estimator.set_hyperparameters(hp1='TF', hp2='TF2', free_text_hp1='Hello!')
def test_algorithm_supported_with_spot_instances(session): session.sagemaker_client.describe_algorithm = Mock(return_value=DESCRIBE_ALGORITHM_RESPONSE) assert AlgorithmEstimator( algorithm_arn="arn:aws:sagemaker:us-east-2:1234:algorithm/scikit-decision-trees", role="SageMakerRole", instance_type="ml.m4.xlarge", instance_count=1, use_spot_instances=True, max_wait=500, sagemaker_session=session, )
def test_algorithm_hyperparameter_integer_range_invalid_range(session): hyperparameters = [ { "Description": "Grow a tree with max_leaf_nodes in best-first fashion.", "Type": "Integer", "Name": "max_leaf_nodes", "Range": { "IntegerParameterRangeSpecification": {"MinValue": "1", "MaxValue": "100000"} }, "IsTunable": True, "IsRequired": False, "DefaultValue": "100", } ] some_algo = copy.deepcopy(DESCRIBE_ALGORITHM_RESPONSE) some_algo["TrainingSpecification"]["SupportedHyperParameters"] = hyperparameters session.sagemaker_client.describe_algorithm = Mock(return_value=some_algo) estimator = AlgorithmEstimator( algorithm_arn="arn:aws:sagemaker:us-east-2:1234:algorithm/scikit-decision-trees", role="SageMakerRole", instance_type="ml.m4.2xlarge", instance_count=1, sagemaker_session=session, ) with pytest.raises(ValueError): estimator.set_hyperparameters(max_leaf_nodes=0) with pytest.raises(ValueError): estimator.set_hyperparameters(max_leaf_nodes=100001)
def test_algorithm_no_required_hyperparameters(sagemaker_session): some_algo = copy.deepcopy(DESCRIBE_ALGORITHM_RESPONSE) del some_algo['TrainingSpecification']['SupportedHyperParameters'] sagemaker_session.sagemaker_client.describe_algorithm = Mock(return_value=some_algo) # Calling AlgorithmEstimator() with unset required hyperparameters # should fail if they are required. # Pass training and hyperparameters channels. This should work assert AlgorithmEstimator( algorithm_arn='arn:aws:sagemaker:us-east-2:1234:algorithm/scikit-decision-trees', role='SageMakerRole', train_instance_type='ml.m4.2xlarge', train_instance_count=1, sagemaker_session=sagemaker_session, )
def test_algorithm_encrypt_inter_container_traffic(session): response = copy.deepcopy(DESCRIBE_ALGORITHM_RESPONSE) response['encrypt_inter_container_traffic'] = True session.sagemaker_client.describe_algorithm = Mock(return_value=response) estimator = AlgorithmEstimator( algorithm_arn= 'arn:aws:sagemaker:us-east-2:1234:algorithm/scikit-decision-trees', role='SageMakerRole', train_instance_type='ml.m4.xlarge', train_instance_count=1, sagemaker_session=session, encrypt_inter_container_traffic=True) encrypt_inter_container_traffic = estimator.encrypt_inter_container_traffic assert encrypt_inter_container_traffic is True
def test_algorithm_required_hyperparameters_not_provided(session): hyperparameters = [ { "Description": "A continuous hyperparameter", "Type": "Categorical", "Name": "hp1", "Range": { "CategoricalParameterRangeSpecification": { "Values": ["TF", "MXNet"] } }, "IsTunable": True, "IsRequired": True, }, { "Name": "hp2", "Description": "A continuous hyperparameter", "Type": "Categorical", "IsTunable": False, "IsRequired": True, }, ] some_algo = copy.deepcopy(DESCRIBE_ALGORITHM_RESPONSE) some_algo["TrainingSpecification"][ "SupportedHyperParameters"] = hyperparameters session.sagemaker_client.describe_algorithm = Mock(return_value=some_algo) estimator = AlgorithmEstimator( algorithm_arn= "arn:aws:sagemaker:us-east-2:1234:algorithm/scikit-decision-trees", role="SageMakerRole", train_instance_type="ml.m4.2xlarge", train_instance_count=1, sagemaker_session=session, ) # hp1 is required and was not provided with pytest.raises(ValueError): estimator.set_hyperparameters(hp2="TF2") # Calling fit with unset required hyperparameters should fail # this covers the use case of not calling set_hyperparameters() explicitly with pytest.raises(ValueError): estimator.fit({"training": "s3://some/place"})
def test_algorithm_trainining_channels_with_invalid_channels( sagemaker_session): training_channels = copy.deepcopy(DESCRIBE_ALGORITHM_RESPONSE) training_channels['TrainingSpecification']['TrainingChannels'] = [ { 'Name': 'training', 'Description': 'Input channel that provides training data', 'IsRequired': True, 'SupportedContentTypes': ['text/csv'], 'SupportedCompressionTypes': ['None'], 'SupportedInputModes': ['File'], }, { 'Name': 'validation', 'Description': 'Input channel that provides validation data', 'IsRequired': False, 'SupportedContentTypes': ['text/csv'], 'SupportedCompressionTypes': ['None'], 'SupportedInputModes': ['File'], }, ] sagemaker_session.sagemaker_client.describe_algorithm = Mock( return_value=training_channels) estimator = AlgorithmEstimator( algorithm_arn= 'arn:aws:sagemaker:us-east-2:1234:algorithm/scikit-decision-trees', role='SageMakerRole', train_instance_type='ml.m4.xlarge', train_instance_count=1, sagemaker_session=sagemaker_session, ) # Passing only validation should fail as training is required. with pytest.raises(ValueError): estimator.fit({'validation': 's3://some/thing'}) # Passing an unknown channel should fail??? with pytest.raises(ValueError): estimator.fit({ 'training': 's3://some/data', 'training2': 's3://some/other/data' })
def test_algorithm_required_hyperparameters_not_provided(sagemaker_session): hyperparameters = [{ 'Description': 'A continuous hyperparameter', 'Type': 'Categorical', 'Name': 'hp1', 'Range': { 'CategoricalParameterRangeSpecification': { 'Values': ['TF', 'MXNet'] } }, 'IsTunable': True, 'IsRequired': True, }, { 'Name': 'hp2', 'Description': 'A continuous hyperparameter', 'Type': 'Categorical', 'IsTunable': False, 'IsRequired': True }] some_algo = copy.deepcopy(DESCRIBE_ALGORITHM_RESPONSE) some_algo['TrainingSpecification'][ 'SupportedHyperParameters'] = hyperparameters sagemaker_session.sagemaker_client.describe_algorithm = Mock( return_value=some_algo) estimator = AlgorithmEstimator( algorithm_arn= 'arn:aws:sagemaker:us-east-2:1234:algorithm/scikit-decision-trees', role='SageMakerRole', train_instance_type='ml.m4.2xlarge', train_instance_count=1, sagemaker_session=sagemaker_session, ) # hp1 is required and was not provided with pytest.raises(ValueError): estimator.set_hyperparameters(hp2='TF2') # Calling fit with unset required hyperparameters should fail # this covers the use case of not calling set_hyperparameters() explicitly with pytest.raises(ValueError): estimator.fit({'training': 's3://some/place'})
def test_algorithm_trainining_channels_with_invalid_channels(session): training_channels = copy.deepcopy(DESCRIBE_ALGORITHM_RESPONSE) training_channels["TrainingSpecification"]["TrainingChannels"] = [ { "Name": "training", "Description": "Input channel that provides training data", "IsRequired": True, "SupportedContentTypes": ["text/csv"], "SupportedCompressionTypes": ["None"], "SupportedInputModes": ["File"], }, { "Name": "validation", "Description": "Input channel that provides validation data", "IsRequired": False, "SupportedContentTypes": ["text/csv"], "SupportedCompressionTypes": ["None"], "SupportedInputModes": ["File"], }, ] session.sagemaker_client.describe_algorithm = Mock( return_value=training_channels) estimator = AlgorithmEstimator( algorithm_arn= "arn:aws:sagemaker:us-east-2:1234:algorithm/scikit-decision-trees", role="SageMakerRole", train_instance_type="ml.m4.xlarge", train_instance_count=1, sagemaker_session=session, ) # Passing only validation should fail as training is required. with pytest.raises(ValueError): estimator.fit({"validation": "s3://some/thing"}) # Passing an unknown channel should fail??? with pytest.raises(ValueError): estimator.fit({ "training": "s3://some/data", "training2": "s3://some/other/data" })
def test_algorithm_trainining_channels_with_expected_channels( sagemaker_session): training_channels = copy.deepcopy(DESCRIBE_ALGORITHM_RESPONSE) training_channels['TrainingSpecification']['TrainingChannels'] = [ { 'Name': 'training', 'Description': 'Input channel that provides training data', 'IsRequired': True, 'SupportedContentTypes': ['text/csv'], 'SupportedCompressionTypes': ['None'], 'SupportedInputModes': ['File'], }, { 'Name': 'validation', 'Description': 'Input channel that provides validation data', 'IsRequired': False, 'SupportedContentTypes': ['text/csv'], 'SupportedCompressionTypes': ['None'], 'SupportedInputModes': ['File'], }, ] sagemaker_session.sagemaker_client.describe_algorithm = Mock( return_value=training_channels) estimator = AlgorithmEstimator( algorithm_arn= 'arn:aws:sagemaker:us-east-2:1234:algorithm/scikit-decision-trees', role='SageMakerRole', train_instance_type='ml.m4.xlarge', train_instance_count=1, sagemaker_session=sagemaker_session, ) # Pass training and validation channels. This should work estimator.fit({ 'training': 's3://some/place', 'validation': 's3://some/other' }) # Passing only the training channel. Validation is optional so this should also work. estimator.fit({'training': 's3://some/place'})
def test_algorithm_trainining_channels_with_expected_channels(session): training_channels = copy.deepcopy(DESCRIBE_ALGORITHM_RESPONSE) training_channels["TrainingSpecification"]["TrainingChannels"] = [ { "Name": "training", "Description": "Input channel that provides training data", "IsRequired": True, "SupportedContentTypes": ["text/csv"], "SupportedCompressionTypes": ["None"], "SupportedInputModes": ["File"], }, { "Name": "validation", "Description": "Input channel that provides validation data", "IsRequired": False, "SupportedContentTypes": ["text/csv"], "SupportedCompressionTypes": ["None"], "SupportedInputModes": ["File"], }, ] session.sagemaker_client.describe_algorithm = Mock( return_value=training_channels) estimator = AlgorithmEstimator( algorithm_arn= "arn:aws:sagemaker:us-east-2:1234:algorithm/scikit-decision-trees", role="SageMakerRole", train_instance_type="ml.m4.xlarge", train_instance_count=1, sagemaker_session=session, ) # Pass training and validation channels. This should work estimator.fit({ "training": "s3://some/place", "validation": "s3://some/other" }) # Passing only the training channel. Validation is optional so this should also work. estimator.fit({"training": "s3://some/place"})
def test_algorithm_hyperparameter_integer_range_invalid_range(session): hyperparameters = [{ 'Description': 'Grow a tree with max_leaf_nodes in best-first fashion.', 'Type': 'Integer', 'Name': 'max_leaf_nodes', 'Range': { 'IntegerParameterRangeSpecification': { 'MinValue': '1', 'MaxValue': '100000' } }, 'IsTunable': True, 'IsRequired': False, 'DefaultValue': '100', }] some_algo = copy.deepcopy(DESCRIBE_ALGORITHM_RESPONSE) some_algo['TrainingSpecification'][ 'SupportedHyperParameters'] = hyperparameters session.sagemaker_client.describe_algorithm = Mock(return_value=some_algo) estimator = AlgorithmEstimator( algorithm_arn= 'arn:aws:sagemaker:us-east-2:1234:algorithm/scikit-decision-trees', role='SageMakerRole', train_instance_type='ml.m4.2xlarge', train_instance_count=1, sagemaker_session=session, ) with pytest.raises(ValueError): estimator.set_hyperparameters(max_leaf_nodes=0) with pytest.raises(ValueError): estimator.set_hyperparameters(max_leaf_nodes=100001)
def test_algorithm_supported_input_mode_with_bad_input_types(sagemaker_session): # verify that the Estimator verifies raises exceptions when # attempting to train with an incorrect input type file_mode_algo = copy.deepcopy(DESCRIBE_ALGORITHM_RESPONSE) file_mode_algo['TrainingSpecification']['TrainingChannels'] = [ { 'Name': 'training', 'Description': 'Input channel that provides training data', 'IsRequired': True, 'SupportedContentTypes': ['text/csv'], 'SupportedCompressionTypes': ['None'], 'SupportedInputModes': ['File'], }, { 'Name': 'validation', 'Description': 'Input channel that provides validation data', 'IsRequired': False, 'SupportedContentTypes': ['text/csv'], 'SupportedCompressionTypes': ['None'], 'SupportedInputModes': ['File', 'Pipe'], }, ] sagemaker_session.sagemaker_client.describe_algorithm = Mock(return_value=file_mode_algo) # Creating a Pipe mode Estimator with a File mode algorithm should fail. with pytest.raises(ValueError): AlgorithmEstimator( algorithm_arn='arn:aws:sagemaker:us-east-2:1234:algorithm/scikit-decision-trees', role='SageMakerRole', train_instance_type='ml.m4.xlarge', train_instance_count=1, input_mode='Pipe', sagemaker_session=sagemaker_session, ) pipe_mode_algo = copy.deepcopy(DESCRIBE_ALGORITHM_RESPONSE) pipe_mode_algo['TrainingSpecification']['TrainingChannels'] = [ { 'Name': 'training', 'Description': 'Input channel that provides training data', 'IsRequired': True, 'SupportedContentTypes': ['text/csv'], 'SupportedCompressionTypes': ['None'], 'SupportedInputModes': ['Pipe'], }, { 'Name': 'validation', 'Description': 'Input channel that provides validation data', 'IsRequired': False, 'SupportedContentTypes': ['text/csv'], 'SupportedCompressionTypes': ['None'], 'SupportedInputModes': ['File', 'Pipe'], }, ] sagemaker_session.sagemaker_client.describe_algorithm = Mock(return_value=pipe_mode_algo) # Creating a File mode Estimator with a Pipe mode algorithm should fail. with pytest.raises(ValueError): AlgorithmEstimator( algorithm_arn='arn:aws:sagemaker:us-east-2:1234:algorithm/scikit-decision-trees', role='SageMakerRole', train_instance_type='ml.m4.xlarge', train_instance_count=1, sagemaker_session=sagemaker_session, )
def test_algorithm_attach_from_hyperparameter_tuning(): session = Mock() job_name = "training-job-that-is-part-of-a-tuning-job" algo_arn = "arn:aws:sagemaker:us-east-2:000000000000:algorithm/scikit-decision-trees" role_arn = "arn:aws:iam::123412341234:role/SageMakerRole" instance_count = 1 instance_type = "ml.m4.xlarge" train_volume_size = 30 input_mode = "File" session.sagemaker_client.list_tags.return_value = {"Tags": []} session.sagemaker_client.describe_algorithm.return_value = DESCRIBE_ALGORITHM_RESPONSE session.sagemaker_client.describe_training_job.return_value = { "TrainingJobName": job_name, "TrainingJobArn": "arn:aws:sagemaker:us-east-2:123412341234:training-job/%s" % job_name, "TuningJobArn": "arn:aws:sagemaker:us-east-2:123412341234:hyper-parameter-tuning-job/%s" % job_name, "ModelArtifacts": { "S3ModelArtifacts": "s3://sagemaker-us-east-2-123412341234/output/model.tar.gz" }, "TrainingJobOutput": { "S3TrainingJobOutput": "s3://sagemaker-us-east-2-123412341234/output/output.tar.gz" }, "TrainingJobStatus": "Succeeded", "HyperParameters": { "_tuning_objective_metric": "validation:accuracy", "max_leaf_nodes": 1, "free_text_hp1": "foo", }, "AlgorithmSpecification": { "AlgorithmName": algo_arn, "TrainingInputMode": input_mode }, "MetricDefinitions": [{ "Name": "validation:accuracy", "Regex": "validation-accuracy: (\\S+)" }], "RoleArn": role_arn, "InputDataConfig": [{ "ChannelName": "training", "DataSource": { "S3DataSource": { "S3DataType": "S3Prefix", "S3Uri": "s3://sagemaker-us-east-2-123412341234/input/training.csv", "S3DataDistributionType": "FullyReplicated", } }, "CompressionType": "None", "RecordWrapperType": "None", }], "OutputDataConfig": { "KmsKeyId": "", "S3OutputPath": "s3://sagemaker-us-east-2-123412341234/output", "RemoveJobNameFromS3OutputPath": False, }, "ResourceConfig": { "InstanceType": instance_type, "InstanceCount": instance_count, "VolumeSizeInGB": train_volume_size, }, "StoppingCondition": { "MaxRuntimeInSeconds": 86400 }, } estimator = AlgorithmEstimator.attach(job_name, sagemaker_session=session) assert estimator.hyperparameters() == { "max_leaf_nodes": 1, "free_text_hp1": "foo" } assert estimator.algorithm_arn == algo_arn assert estimator.role == role_arn assert estimator.train_instance_count == instance_count assert estimator.train_instance_type == instance_type assert estimator.train_volume_size == train_volume_size assert estimator.input_mode == input_mode assert estimator.sagemaker_session == session
def test_algorithm_supported_input_mode_with_bad_input_types(session): # verify that the Estimator verifies raises exceptions when # attempting to train with an incorrect input type file_mode_algo = copy.deepcopy(DESCRIBE_ALGORITHM_RESPONSE) file_mode_algo["TrainingSpecification"]["TrainingChannels"] = [ { "Name": "training", "Description": "Input channel that provides training data", "IsRequired": True, "SupportedContentTypes": ["text/csv"], "SupportedCompressionTypes": ["None"], "SupportedInputModes": ["File"], }, { "Name": "validation", "Description": "Input channel that provides validation data", "IsRequired": False, "SupportedContentTypes": ["text/csv"], "SupportedCompressionTypes": ["None"], "SupportedInputModes": ["File", "Pipe"], }, ] session.sagemaker_client.describe_algorithm = Mock( return_value=file_mode_algo) # Creating a Pipe mode Estimator with a File mode algorithm should fail. with pytest.raises(ValueError): AlgorithmEstimator( algorithm_arn= "arn:aws:sagemaker:us-east-2:1234:algorithm/scikit-decision-trees", role="SageMakerRole", train_instance_type="ml.m4.xlarge", train_instance_count=1, input_mode="Pipe", sagemaker_session=session, ) pipe_mode_algo = copy.deepcopy(DESCRIBE_ALGORITHM_RESPONSE) pipe_mode_algo["TrainingSpecification"]["TrainingChannels"] = [ { "Name": "training", "Description": "Input channel that provides training data", "IsRequired": True, "SupportedContentTypes": ["text/csv"], "SupportedCompressionTypes": ["None"], "SupportedInputModes": ["Pipe"], }, { "Name": "validation", "Description": "Input channel that provides validation data", "IsRequired": False, "SupportedContentTypes": ["text/csv"], "SupportedCompressionTypes": ["None"], "SupportedInputModes": ["File", "Pipe"], }, ] session.sagemaker_client.describe_algorithm = Mock( return_value=pipe_mode_algo) # Creating a File mode Estimator with a Pipe mode algorithm should fail. with pytest.raises(ValueError): AlgorithmEstimator( algorithm_arn= "arn:aws:sagemaker:us-east-2:1234:algorithm/scikit-decision-trees", role="SageMakerRole", train_instance_type="ml.m4.xlarge", train_instance_count=1, sagemaker_session=session, )